library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)
library(usmap)
library(lmtest)
library(pracma)
library(lmtest)
library(forecast)
library(vars)
library(rvest)
library(RSelenium)
library(seleniumPipes)
library(dLagM)
library(jsonlite)
library(rgdal)
library(esri2sf)
library(readr)

options(
  tigris_class = "sf",
  tigris_use_cache = TRUE
)

Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")

# SET your path here to the covid19analysis folder.
sg_path <- '/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/'

sg_hourly_path <- '/Users/simonespeizer/pCloud Drive/Shared/SFBI/Restricted Data Library/Safegraph/covid19analysis/weekly-patterns/v2/hourly/'

Case data

Load SFC and Alameda case data, and SCC case data.

# block groups
sf_blockgroups <-
  block_groups("CA","San Francisco", cb=F, progress_bar=F) %>%
  st_transform('+proj=longlat +datum=WGS84')

ac_blockgroups <-
  block_groups("CA","Alameda", cb=F, progress_bar=F) %>%
  st_transform('+proj=longlat +datum=WGS84')

sf_bgs <- sf_blockgroups %>% pull(GEOID)

ac_bgs <- ac_blockgroups %>% pull(GEOID)

# zip code areas
zctas_94 <- 
  zctas(cb = F, starts_with = "94") %>% 
  st_transform('+proj=longlat +datum=WGS84') %>%
  dplyr::select(ZCTA5CE10, geometry)

zctas_95 <- 
  zctas(cb = F, starts_with = "95") %>% 
  st_transform('+proj=longlat +datum=WGS84') %>%
  dplyr::select(ZCTA5CE10, geometry)

zctas_94_95 <- rbind(zctas_94, zctas_95)

# join with block groups
sf_bg_zctas <- sf_blockgroups %>% 
  st_centroid() %>%
  st_join(zctas_94_95) %>% 
  dplyr::select(GEOID, ZCTA5CE10) %>%
  rename(blockgroup = GEOID, zipcode = ZCTA5CE10)

ac_bg_zctas <- ac_blockgroups %>% 
  st_centroid() %>%
  st_join(zctas_94_95) %>% 
  dplyr::select(GEOID, ZCTA5CE10) %>%
  rename(blockgroup = GEOID, zipcode = ZCTA5CE10)


# get SF case data
sf_place_cases <- 
  read_csv("https://raw.githubusercontent.com/datadesk/california-coronavirus-data/master/latimes-place-totals.csv") %>%
  filter(county == 'San Francisco')

# get population data for San Francisco
acs_vars = readRDS("/Users/simonespeizer/CEE 218Z/censusData2018_acs_acs5.rds")

# define a function for pulling census data
pullCensus <- function(variableToPull, county) {
    regionString <- paste0("state:06+county:", county)
    censusData <- getCensus(
      name = "acs/acs5",
      vintage = 2018,
      region = "block group:*", 
      regionin = regionString,
      vars = variableToPull
    ) %>%
    mutate(blockgroup = paste0(state,county,tract,block_group)) %>%
      select_if(!names(.) %in% c("GEO_ID","state","county","tract","block_group","NAME"))
  
  return(censusData)
}

sf_fips <- fips("CA", "San Francisco") %>% substr(3,5)

# getting population by zip code
sf_pop_zip <- pullCensus("B01003_001E", sf_fips) %>% 
  left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_pop_zip = sum(B01003_001E))

# join with cases and calculate cases per person
sf_cases_zip <- sf_place_cases %>% left_join(sf_pop_zip, by = c("place" = "zipcode")) %>%
  mutate(cases_by_pop = confirmed_cases / total_pop_zip) %>%
  rename(zipcode = place)

# get Alameda County case data - downloaded manually
ac_place_cases <- read.csv("Simone_Speizer/Alameda_County_Cumulative_Cases_By_Place_And_Zip.csv")

# handle the ones that are reported as <10 by calling them 5 cases. Note this is a simplification/choice I made and could be changed.
ac_cases_zip <- ac_place_cases %>% 
  rename(date = DtCreate) %>%
  mutate(date = date %>%  substr(1,10) %>% as.Date()) %>%
  dplyr::select(c(date, contains("F9"))) %>% # only select zip code data
  gather(key = "zipcode", value = "cases", -date) %>%
  mutate(cases = ifelse(cases == "<10", "5", cases), 
         zipcode = zipcode %>% substr(2,6)) %>% # replace cases <10 with 10 and remove the "F" from zipcode names
  mutate(cases = as.numeric(cases))

# get Alameda County populations by zip code
ac_fips <- fips("CA", "Alameda") %>% substr(3,5)

# calculate population by zip code
ac_pop_zip <- pullCensus("B01003_001E", ac_fips) %>% 
  left_join(ac_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_pop_zip = sum(B01003_001E))

# join with cases
ac_cases_zip <- ac_cases_zip %>% left_join(ac_pop_zip) %>%
  mutate(cases_by_pop = cases / total_pop_zip)

Demographic correlations

First pull the demographic information by zip code.

# block group populations
sf_pop_bg <- pullCensus("B01003_001E", sf_fips) %>% 
  rename(total_pop = B01003_001E)

ac_pop_bg <- pullCensus("B01003_001E", ac_fips) %>% 
  rename(total_pop = B01003_001E)

sf_avg_household_size <- pullCensus("B25010_001E", sf_fips) %>% 
  filter(B25010_001E > 0) %>% # some had negative values
  left_join(sf_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  left_join(sf_pop_bg) %>%
  group_by(zipcode) %>%
  summarize(avg_household_size = weighted.mean(B25010_001E, total_pop)) %>%
  filter(!is.na(zipcode)) 

# ALAMEDA
# average household size
ac_avg_household_size <- pullCensus("B25010_001E", ac_fips) %>% 
  filter(B25010_001E > 0) %>%
  left_join(ac_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  left_join(ac_pop_bg) %>%
  group_by(zipcode) %>%
  summarize(avg_household_size = weighted.mean(B25010_001E, total_pop)) %>%
  filter(!is.na(zipcode))

# occupants per room
ac_occupants_zip <- pullCensus("group(B25014)", ac_fips) %>% 
  dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  dplyr::select(-variable) %>% 
  separate(label, into = c(NA, NA, NA,"occupants per room"), sep = "!!") %>% 
  filter(!is.na(`occupants per room`)) %>%
  group_by(blockgroup, `occupants per room`) %>%
  summarize(estimate_tot = sum(estimate)) %>% 
  spread(key = `occupants per room`, value = estimate_tot) %>%
  mutate(total_nums = `0.50 or less occupants per room` + `0.51 to 1.00 occupants per room` + `1.01 to 1.50 occupants per room` + `1.51 to 2.00 occupants per room` + `2.01 or more occupants per room`) %>%
  left_join(ac_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_nums = sum(total_nums), total_0.5less = sum(`0.50 or less occupants per room`), total_0.5to1 = sum(`0.51 to 1.00 occupants per room`), total_1to1.5 = sum(`1.01 to 1.50 occupants per room`), total_1.5to2 = sum(`1.51 to 2.00 occupants per room`), total_2more = sum(`2.01 or more occupants per room`)) %>%
  mutate(`percent more than 1 occupant` = (total_1to1.5 + total_1.5to2 + total_2more) * 100/ total_nums, `percent less than 1 occupant` = (100-`percent more than 1 occupant`), `percent 0.5 or less occupants` = total_0.5less*100/total_nums, `percent more than 0.5 occupants` = (100-`percent 0.5 or less occupants`), `percent more than 2 occupants` = total_2more*100/total_nums)

# population density
ac_pop_density_zip <- ac_cases_zip %>% filter(date == max(date)) %>%
  dplyr::select(zipcode, total_pop_zip) %>%
  left_join(zctas_94_95, by = c("zipcode" = "ZCTA5CE10")) %>%
  st_as_sf() %>%
  mutate(zip_area = st_area(.), pop_density = total_pop_zip / zip_area) %>%
  filter(!is.na(pop_density))

# bucketed household size
ac_house_size_zip <- pullCensus("group(B11016)", ac_fips) %>% 
  dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  dplyr::select(-variable) %>% 
  separate(label, into = c(NA, NA, "type", "size"), sep = "!!") %>% 
  filter(!is.na(type)) %>%
  filter(!is.na(size)) %>% 
  dplyr::select(-type) %>%
  group_by(blockgroup, size) %>%
  summarize(`total of this size` = sum(estimate)) %>% 
  spread(key = size, value = `total of this size`) %>%
  mutate(total_nums = `1-person household` + `2-person household` + `3-person household` + `4-person household` + `5-person household`+ `6-person household` + `7-or-more person household`) %>%
  left_join(ac_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_nums = sum(total_nums), total_1 = sum(`1-person household`), total_2 = sum(`2-person household`), total_3 = sum(`3-person household`), total_4 = sum(`4-person household`), total_5 = sum(`5-person household`), total_6 = sum(`6-person household`), total_7more = sum(`7-or-more person household`)) %>%
  mutate(`percent 5 or more` = (total_5 + total_6 + total_7more)* 100/ total_nums, `percent 1 or 2 only` = (total_1 + total_2)*100/total_nums)

# units in structure
ac_units_zip <- pullCensus("group(B25024)", ac_fips) %>% dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  dplyr::select(-variable) %>% 
  separate(label, into = c(NA, NA, "units"), sep = "!!") %>% 
  filter(!is.na(units)) %>%
  spread(key = units, value = estimate) %>%
  mutate(total_nums = `1, attached` + `1, detached` + `10 to 19` + `2` + `20 to 49`+ `3 or 4` + `5 to 9`+ `50 or more`+ `Boat, RV, van, etc.`+ `Mobile home`, total_20more = `20 to 49`+`50 or more`, total_10more = `10 to 19` + `20 to 49`+`50 or more`, total_1 = `1, attached` + `1, detached`) %>%
  left_join(ac_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_nums = sum(total_nums), total_20more = sum(total_20more), total_10more = sum(total_20more), total_1_only = sum(total_1)) %>%
  mutate(`percent 10 or more units` = total_10more*100/total_nums, `percent 20 or more units` = total_20more*100/total_nums, `percent 1 only` = total_1_only*100/total_nums, `percent more than 1 unit` = (100 - `percent 1 only`))

# income
ac_income_zip <- pullCensus("group(B19001)", ac_fips) %>% dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  dplyr::select(-variable) %>% 
  separate(label, into = c(NA, NA, "income"), sep = "!!") %>% 
  filter(!is.na(income)) %>%
  spread(key = income, value = estimate) %>%
  mutate(total_nums = `Less than $10,000` + `$10,000 to $14,999` + `$15,000 to $19,999` + `$20,000 to $24,999` + `$25,000 to $29,999` + `$30,000 to $34,999` + `$35,000 to $39,999` + `$40,000 to $44,999` + `$45,000 to $49,999` + `$50,000 to $59,999` + `$60,000 to $74,999` + `$75,000 to $99,999` + `$100,000 to $124,999` + `$125,000 to $149,999` + `$150,000 to $199,999` + `$200,000 or more`, over_75000 = `$75,000 to $99,999` + `$100,000 to $124,999` + `$125,000 to $149,999` + `$150,000 to $199,999` + `$200,000 or more`, over_100000 = `$100,000 to $124,999` + `$125,000 to $149,999` + `$150,000 to $199,999` + `$200,000 or more`, over_125000 = `$125,000 to $149,999` + `$150,000 to $199,999` + `$200,000 or more`) %>%
  left_join(ac_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_nums = sum(total_nums), total_over_75000 = sum(over_75000), total_over_100000 = sum(over_100000), total_over_125000 = sum(over_125000)) %>%
  mutate(percent_over_75000 = total_over_75000*100 / total_nums,
         percent_under_75000 = (100 - percent_over_75000),
         percent_over_100000 = total_over_100000*100 / total_nums,
         percent_under_100000 = (100 - percent_over_100000),
         percent_over_125000 = total_over_125000*100 / total_nums,
        percent_under_125000 = (100 - percent_over_125000))

ac_curr_cases_dem <- left_join(ac_cases_zip %>% filter(date == max(date)), ac_avg_household_size) %>% left_join(ac_pop_density_zip %>% dplyr::select(pop_density, zipcode)) %>%  left_join(ac_occupants_zip %>% dplyr::select(`percent more than 1 occupant`, `percent more than 0.5 occupants`, `percent more than 2 occupants`, zipcode)) %>% left_join(ac_units_zip %>% dplyr::select(`percent 10 or more units`, `percent more than 1 unit`, zipcode)) %>% left_join(ac_income_zip %>% dplyr::select(percent_under_75000, percent_under_100000, percent_under_125000, zipcode)) %>% as.data.frame() %>% 
  filter(!is.na(zipcode) & !is.na(avg_household_size)) %>%
  mutate(log_cases_by_pop = log(cases_by_pop))

# SANTA CLARA
# block groups
scc_blockgroups <-
  block_groups("CA","Santa Clara", cb=F, progress_bar=F) %>%
  st_transform('+proj=longlat +datum=WGS84')

scc_bgs <- scc_blockgroups %>% pull(GEOID)

# join with zip codes
scc_bg_zctas <- scc_blockgroups %>% 
  st_centroid() %>%
  st_join(zctas_94_95) %>% 
  dplyr::select(GEOID, ZCTA5CE10) %>%
  rename(blockgroup = GEOID, zipcode = ZCTA5CE10)

# get case data
scc_map_cases <- esri2sf("https://services2.arcgis.com/RiZWfy7B1r76pKTz/arcgis/rest/services/COVID_zip_FC/FeatureServer/0")
## [1] "Feature Layer"
## [1] "esriGeometryPolygon"
scc_cases_zip <- scc_map_cases %>% 
  dplyr::select(zipcode, Cases, Population) %>% 
  st_drop_geometry() %>%
  rename(cases = Cases, pop = Population) %>%
  mutate(cases = replace_na(cases, 0)) %>%
  mutate(cases_by_pop = cases/pop) %>%
  filter(is.numeric(cases_by_pop))

# get population data 
scc_fips <- fips("CA", "Santa Clara") %>% substr(3,5)

scc_pop_zip <- pullCensus("B01003_001E", scc_fips) %>% 
  left_join(scc_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_pop_zip = sum(B01003_001E))

scc_pop_bg <- pullCensus("B01003_001E", scc_fips) %>%
  rename(total_pop = B01003_001E)

# average household size
scc_avg_household_size <- pullCensus("B25010_001E", scc_fips) %>% 
  filter(B25010_001E > 0) %>%
  left_join(scc_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  left_join(scc_pop_bg) %>%
  group_by(zipcode) %>%
  summarize(avg_household_size = weighted.mean(B25010_001E, total_pop)) %>%
  filter(!is.na(zipcode))

# occupants per room
scc_occupants_zip <- pullCensus("group(B25014)", scc_fips) %>% 
  dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  dplyr::select(-variable) %>% 
  separate(label, into = c(NA, NA, NA,"occupants per room"), sep = "!!") %>% 
  filter(!is.na(`occupants per room`)) %>%
  group_by(blockgroup, `occupants per room`) %>%
  summarize(estimate_tot = sum(estimate)) %>% 
  spread(key = `occupants per room`, value = estimate_tot) %>%
  mutate(total_nums = `0.50 or less occupants per room` + `0.51 to 1.00 occupants per room` + `1.01 to 1.50 occupants per room` + `1.51 to 2.00 occupants per room` + `2.01 or more occupants per room`) %>%
  left_join(scc_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_nums = sum(total_nums), total_0.5less = sum(`0.50 or less occupants per room`), total_0.5to1 = sum(`0.51 to 1.00 occupants per room`), total_1to1.5 = sum(`1.01 to 1.50 occupants per room`), total_1.5to2 = sum(`1.51 to 2.00 occupants per room`), total_2more = sum(`2.01 or more occupants per room`)) %>%
  mutate(`percent more than 1 occupant` = (total_1to1.5 + total_1.5to2 + total_2more) * 100/ total_nums, `percent less than 1 occupant` = (100-`percent more than 1 occupant`), `percent 0.5 or less occupants` = total_0.5less*100/total_nums, `percent more than 0.5 occupants` = (100-`percent 0.5 or less occupants`), `percent more than 2 occupants` = total_2more*100/total_nums)

# population density
scc_pop_density_zip <- scc_cases_zip %>%
  dplyr::select(zipcode, pop) %>%
  rename(total_pop_zip = pop) %>%
  left_join(zctas_94_95, by = c("zipcode" = "ZCTA5CE10")) %>%
  st_as_sf() %>%
  mutate(zip_area = st_area(.), pop_density = total_pop_zip / zip_area) %>%
  filter(!is.na(pop_density))

# bucketed household size
scc_house_size_zip <- pullCensus("group(B11016)", scc_fips) %>% 
  dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  dplyr::select(-variable) %>% 
  separate(label, into = c(NA, NA, "type", "size"), sep = "!!") %>% 
  filter(!is.na(type)) %>%
  filter(!is.na(size)) %>% 
  dplyr::select(-type) %>%
  group_by(blockgroup, size) %>%
  summarize(`total of this size` = sum(estimate)) %>% 
  spread(key = size, value = `total of this size`) %>%
  mutate(total_nums = `1-person household` + `2-person household` + `3-person household` + `4-person household` + `5-person household`+ `6-person household` + `7-or-more person household`) %>%
  left_join(scc_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_nums = sum(total_nums), total_1 = sum(`1-person household`), total_2 = sum(`2-person household`), total_3 = sum(`3-person household`), total_4 = sum(`4-person household`), total_5 = sum(`5-person household`), total_6 = sum(`6-person household`), total_7more = sum(`7-or-more person household`)) %>%
  mutate(`percent 5 or more` = (total_5 + total_6 + total_7more)* 100/ total_nums, `percent 1 or 2 only` = (total_1 + total_2)*100/total_nums)

# units in structure
scc_units_zip <- pullCensus("group(B25024)", scc_fips) %>% dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  dplyr::select(-variable) %>% 
  separate(label, into = c(NA, NA, "units"), sep = "!!") %>% 
  filter(!is.na(units)) %>%
  spread(key = units, value = estimate) %>%
  mutate(total_nums = `1, attached` + `1, detached` + `10 to 19` + `2` + `20 to 49`+ `3 or 4` + `5 to 9`+ `50 or more`+ `Boat, RV, van, etc.`+ `Mobile home`, total_20more = `20 to 49`+`50 or more`, total_10more = `10 to 19` + `20 to 49`+`50 or more`, total_1 = `1, attached` + `1, detached`) %>%
  left_join(scc_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_nums = sum(total_nums), total_20more = sum(total_20more), total_10more = sum(total_20more), total_1_only = sum(total_1)) %>%
  mutate(`percent 10 or more units` = total_10more*100/total_nums, `percent 20 or more units` = total_20more*100/total_nums, `percent 1 only` = total_1_only*100/total_nums, `percent more than 1 unit` = (100 - `percent 1 only`))

# income
scc_income_zip <- pullCensus("group(B19001)", scc_fips) %>% dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>% 
  gather(key = "variable", value = "estimate", -blockgroup) %>% 
  mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>% 
  dplyr::select(-variable) %>% 
  separate(label, into = c(NA, NA, "income"), sep = "!!") %>% 
  filter(!is.na(income)) %>%
  spread(key = income, value = estimate) %>%
  mutate(total_nums = `Less than $10,000` + `$10,000 to $14,999` + `$15,000 to $19,999` + `$20,000 to $24,999` + `$25,000 to $29,999` + `$30,000 to $34,999` + `$35,000 to $39,999` + `$40,000 to $44,999` + `$45,000 to $49,999` + `$50,000 to $59,999` + `$60,000 to $74,999` + `$75,000 to $99,999` + `$100,000 to $124,999` + `$125,000 to $149,999` + `$150,000 to $199,999` + `$200,000 or more`, over_75000 = `$75,000 to $99,999` + `$100,000 to $124,999` + `$125,000 to $149,999` + `$150,000 to $199,999` + `$200,000 or more`, over_100000 = `$100,000 to $124,999` + `$125,000 to $149,999` + `$150,000 to $199,999` + `$200,000 or more`, over_125000 = `$125,000 to $149,999` + `$150,000 to $199,999` + `$200,000 or more`) %>%
  left_join(scc_bg_zctas %>% dplyr::select(blockgroup, zipcode)) %>% 
  group_by(zipcode) %>%
  summarize(total_nums = sum(total_nums), total_over_75000 = sum(over_75000), total_over_100000 = sum(over_100000), total_over_125000 = sum(over_125000)) %>%
  mutate(percent_over_75000 = total_over_75000*100 / total_nums,
         percent_under_75000 = (100 - percent_over_75000),
         percent_over_100000 = total_over_100000*100 / total_nums,
         percent_under_100000 = (100 - percent_over_100000),
         percent_over_125000 = total_over_125000*100 / total_nums,
        percent_under_125000 = (100 - percent_over_125000))

scc_curr_cases_dem <- left_join(scc_cases_zip, scc_avg_household_size) %>% left_join(scc_pop_density_zip %>% dplyr::select(pop_density, zipcode)) %>%  left_join(scc_occupants_zip %>% dplyr::select(`percent more than 1 occupant`, `percent more than 0.5 occupants`, `percent more than 2 occupants`, zipcode)) %>% left_join(scc_units_zip %>% dplyr::select(`percent 10 or more units`, `percent more than 1 unit`, zipcode)) %>% left_join(scc_income_zip %>% dplyr::select(percent_under_75000, percent_under_100000, percent_under_125000, zipcode)) %>% as.data.frame() %>%
  filter(!is.na(zipcode) & !is.na(avg_household_size)) %>%
  mutate(log_cases_by_pop = log(cases_by_pop))

Absolute number of cases

For all of these, I show the correlation with cases per capita and correlation with log of cases per capita.

Household size.

# Alameda
fig_ac_cases_hhs <- plot_ly(ac_curr_cases_dem) %>%
  add_trace(x = ~avg_household_size, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~avg_household_size, y = fitted(lm(cases_by_pop~ avg_household_size, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Household size, population weighted average'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")

fig_ac_cases_hhs
ac_cases_hhs_model <- lm(cases_by_pop~ avg_household_size, ac_curr_cases_dem)

summary(ac_cases_hhs_model)
## 
## Call:
## lm(formula = cases_by_pop ~ avg_household_size, data = ac_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0020362 -0.0010094 -0.0001468  0.0007284  0.0049034 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.0032338  0.0013476  -2.400 0.020707 *  
## avg_household_size  0.0017964  0.0004724   3.803 0.000438 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001446 on 44 degrees of freedom
## Multiple R-squared:  0.2473, Adjusted R-squared:  0.2302 
## F-statistic: 14.46 on 1 and 44 DF,  p-value: 0.0004376
# also do with log of cases
ac_cases_hhs_model_log <- lm(log_cases_by_pop~ avg_household_size, ac_curr_cases_dem)

summary(ac_cases_hhs_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ avg_household_size, data = ac_curr_cases_dem)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.25329 -0.64851  0.06326  0.66328  1.49219 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -8.9684     0.6862 -13.069  < 2e-16 ***
## avg_household_size   0.8257     0.2406   3.432  0.00131 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7366 on 44 degrees of freedom
## Multiple R-squared:  0.2112, Adjusted R-squared:  0.1932 
## F-statistic: 11.78 on 1 and 44 DF,  p-value: 0.001315
# SCC
fig_scc_cases_hhs <- plot_ly(scc_curr_cases_dem) %>%
  add_trace(x = ~avg_household_size, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~avg_household_size, y = fitted(lm(cases_by_pop~ avg_household_size, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Household size, population weighted average'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")

fig_scc_cases_hhs
scc_cases_hhs_model <- lm(cases_by_pop~ avg_household_size, scc_curr_cases_dem)

summary(scc_cases_hhs_model)
## 
## Call:
## lm(formula = cases_by_pop ~ avg_household_size, data = scc_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0017069 -0.0004330 -0.0000825  0.0001695  0.0038324 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)   
## (Intercept)        -0.0007294  0.0006548  -1.114  0.27031   
## avg_household_size  0.0006462  0.0002144   3.015  0.00394 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000879 on 53 degrees of freedom
## Multiple R-squared:  0.1464, Adjusted R-squared:  0.1303 
## F-statistic: 9.088 on 1 and 53 DF,  p-value: 0.003943
# log 
scc_cases_hhs_model_log <- lm(log_cases_by_pop~ avg_household_size, scc_curr_cases_dem %>% filter(cases != 0))

summary(scc_cases_hhs_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ avg_household_size, data = scc_curr_cases_dem %>% 
##     filter(cases != 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.96137 -0.32319 -0.00645  0.17926  1.81371 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -8.1615     0.4201 -19.427  < 2e-16 ***
## avg_household_size   0.4700     0.1366   3.439  0.00125 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5049 on 46 degrees of freedom
## Multiple R-squared:  0.2046, Adjusted R-squared:  0.1873 
## F-statistic: 11.83 on 1 and 46 DF,  p-value: 0.00125

Population density

# Alameda
fig_ac_cases_pop_dens <- plot_ly(ac_curr_cases_dem) %>%
  add_trace(x = ~pop_density, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~pop_density, y = fitted(lm(cases_by_pop~ pop_density, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Population density'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")

fig_ac_cases_pop_dens
ac_cases_pop_dens_model <- lm(cases_by_pop~ pop_density, ac_curr_cases_dem)

summary(ac_cases_pop_dens_model)
## 
## Call:
## lm(formula = cases_by_pop ~ pop_density, data = ac_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0017806 -0.0011573 -0.0005960  0.0003101  0.0058180 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0017236  0.0003775   4.566 3.99e-05 ***
## pop_density 0.0376935  0.1057325   0.356    0.723    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001665 on 44 degrees of freedom
## Multiple R-squared:  0.00288,    Adjusted R-squared:  -0.01978 
## F-statistic: 0.1271 on 1 and 44 DF,  p-value: 0.7232
ac_cases_pop_dens_model_log <- lm(log_cases_by_pop~ pop_density, ac_curr_cases_dem)

summary(ac_cases_pop_dens_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ pop_density, data = ac_curr_cases_dem)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.26522 -0.70930  0.05947  0.46639  1.80817 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -6.624      0.188 -35.233   <2e-16 ***
## pop_density   -6.977     52.659  -0.132    0.895    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8292 on 44 degrees of freedom
## Multiple R-squared:  0.0003988,  Adjusted R-squared:  -0.02232 
## F-statistic: 0.01755 on 1 and 44 DF,  p-value: 0.8952
# SCC
fig_scc_cases_pop_dens <- plot_ly(scc_curr_cases_dem) %>%
  add_trace(x = ~pop_density, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~pop_density, y = fitted(lm(cases_by_pop~ pop_density, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Population density'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")

fig_scc_cases_pop_dens
scc_cases_pop_dens_model <- lm(cases_by_pop~ pop_density, scc_curr_cases_dem)

summary(scc_cases_pop_dens_model)
## 
## Call:
## lm(formula = cases_by_pop ~ pop_density, data = scc_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0018121 -0.0005262 -0.0000675  0.0003253  0.0033551 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0009785  0.0002150   4.551 3.16e-05 ***
## pop_density 0.1002471  0.0747943   1.340    0.186    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0009357 on 53 degrees of freedom
## Multiple R-squared:  0.03278,    Adjusted R-squared:  0.01453 
## F-statistic: 1.796 on 1 and 53 DF,  p-value: 0.1859
scc_cases_pop_dens_model_log <- lm(log_cases_by_pop~ pop_density, scc_curr_cases_dem %>% filter(cases != 0))

summary(scc_cases_pop_dens_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ pop_density, data = scc_curr_cases_dem %>% 
##     filter(cases != 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.11800 -0.43629  0.02966  0.30559  1.48172 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -6.9372     0.1489 -46.592   <2e-16 ***
## pop_density  84.0741    53.2314   1.579    0.121    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5513 on 46 degrees of freedom
## Multiple R-squared:  0.05144,    Adjusted R-squared:  0.03082 
## F-statistic: 2.495 on 1 and 46 DF,  p-value: 0.1211

Occupants per room.

# Alameda
fig_ac_cases_occ_1 <- plot_ly(ac_curr_cases_dem) %>%
  add_trace(x = ~`percent more than 1 occupant`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~`percent more than 1 occupant`, y = fitted(lm(cases_by_pop~ `percent more than 1 occupant`, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent more than 1 occupant per room'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")

fig_ac_cases_occ_1
ac_cases_occ_1_model <- lm(cases_by_pop~ `percent more than 1 occupant`, ac_curr_cases_dem)

summary(ac_cases_occ_1_model)
## 
## Call:
## lm(formula = cases_by_pop ~ `percent more than 1 occupant`, data = ac_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0022719 -0.0005746 -0.0000026  0.0005459  0.0035099 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    1.384e-04  2.690e-04   0.515    0.609    
## `percent more than 1 occupant` 2.459e-04  3.157e-05   7.789 8.13e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001081 on 44 degrees of freedom
## Multiple R-squared:  0.5796, Adjusted R-squared:  0.5701 
## F-statistic: 60.66 on 1 and 44 DF,  p-value: 8.127e-10
ac_cases_occ_1_model_log <- lm(log_cases_by_pop~ `percent more than 1 occupant`, ac_curr_cases_dem)

summary(ac_cases_occ_1_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ `percent more than 1 occupant`, 
##     data = ac_curr_cases_dem)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.64926 -0.35425  0.07074  0.28947  1.51698 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    -7.40116    0.14979 -49.410  < 2e-16 ***
## `percent more than 1 occupant`  0.11048    0.01758   6.284 1.29e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6021 on 44 degrees of freedom
## Multiple R-squared:  0.473,  Adjusted R-squared:  0.461 
## F-statistic: 39.49 on 1 and 44 DF,  p-value: 1.288e-07
# SCC
fig_scc_cases_occ_1 <- plot_ly(scc_curr_cases_dem) %>%
  add_trace(x = ~`percent more than 1 occupant`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~`percent more than 1 occupant`, y = fitted(lm(cases_by_pop~ `percent more than 1 occupant`, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent more than 1 occupant per room'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")

fig_scc_cases_occ_1
scc_cases_occ_1_model <- lm(cases_by_pop~ `percent more than 1 occupant`, scc_curr_cases_dem)

summary(scc_cases_occ_1_model)
## 
## Call:
## lm(formula = cases_by_pop ~ `percent more than 1 occupant`, data = scc_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0015011 -0.0005152 -0.0000740  0.0003815  0.0033006 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    4.879e-04  1.808e-04   2.699  0.00932 ** 
## `percent more than 1 occupant` 9.740e-05  1.969e-05   4.947 8.01e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0007869 on 53 degrees of freedom
## Multiple R-squared:  0.3159, Adjusted R-squared:  0.3029 
## F-statistic: 24.47 on 1 and 53 DF,  p-value: 8.013e-06
scc_cases_occ_1_model_log <- lm(log_cases_by_pop~ `percent more than 1 occupant`, scc_curr_cases_dem %>% filter(cases != 0))

summary(scc_cases_occ_1_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ `percent more than 1 occupant`, 
##     data = scc_curr_cases_dem %>% filter(cases != 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.10401 -0.22420 -0.01544  0.29984  1.40296 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    -7.12825    0.12395 -57.509  < 2e-16 ***
## `percent more than 1 occupant`  0.04958    0.01291   3.839 0.000375 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4926 on 46 degrees of freedom
## Multiple R-squared:  0.2427, Adjusted R-squared:  0.2262 
## F-statistic: 14.74 on 1 and 46 DF,  p-value: 0.0003752

That is very high! Try with more than 0.5 occupants.

# Alameda
fig_ac_cases_occ_0.5 <- plot_ly(ac_curr_cases_dem) %>%
  add_trace(x = ~`percent more than 0.5 occupants`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~`percent more than 0.5 occupants`, y = fitted(lm(cases_by_pop~ `percent more than 0.5 occupants`, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent more than 0.5 occupants per room'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")

fig_ac_cases_occ_0.5
ac_cases_occ_0.5_model <- lm(cases_by_pop~ `percent more than 0.5 occupants`, ac_curr_cases_dem)

summary(ac_cases_occ_0.5_model)
## 
## Call:
## lm(formula = cases_by_pop ~ `percent more than 0.5 occupants`, 
##     data = ac_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0024300 -0.0009461 -0.0001091  0.0004828  0.0046568 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       -1.101e-03  7.628e-04  -1.443 0.156185    
## `percent more than 0.5 occupants`  7.081e-05  1.774e-05   3.991 0.000245 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001429 on 44 degrees of freedom
## Multiple R-squared:  0.2658, Adjusted R-squared:  0.2491 
## F-statistic: 15.93 on 1 and 44 DF,  p-value: 0.0002454
ac_cases_occ_0.5_model_log <- lm(log_cases_by_pop~ `percent more than 0.5 occupants`, ac_curr_cases_dem)

summary(ac_cases_occ_0.5_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ `percent more than 0.5 occupants`, 
##     data = ac_curr_cases_dem)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7782 -0.4802  0.0876  0.4686  1.6359 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       -8.133669   0.376065 -21.628  < 2e-16 ***
## `percent more than 0.5 occupants`  0.036073   0.008746   4.124 0.000162 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7043 on 44 degrees of freedom
## Multiple R-squared:  0.2788, Adjusted R-squared:  0.2624 
## F-statistic: 17.01 on 1 and 44 DF,  p-value: 0.0001623
# SCC
fig_scc_cases_occ_0.5 <- plot_ly(scc_curr_cases_dem) %>%
  add_trace(x = ~`percent more than 0.5 occupants`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~`percent more than 0.5 occupants`, y = fitted(lm(cases_by_pop~ `percent more than 0.5 occupants`, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent more than 0.5 occupants per room'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")

fig_scc_cases_occ_0.5
scc_cases_occ_0.5_model <- lm(cases_by_pop~ `percent more than 0.5 occupants`, scc_curr_cases_dem)

summary(scc_cases_occ_0.5_model)
## 
## Call:
## lm(formula = cases_by_pop ~ `percent more than 0.5 occupants`, 
##     data = scc_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0018484 -0.0004710 -0.0000728  0.0003707  0.0031909 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                       -2.822e-04  4.845e-04  -0.582  0.56275   
## `percent more than 0.5 occupants`  3.315e-05  1.043e-05   3.178  0.00247 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0008719 on 53 degrees of freedom
## Multiple R-squared:  0.1601, Adjusted R-squared:  0.1442 
## F-statistic:  10.1 on 1 and 53 DF,  p-value: 0.002472
scc_cases_occ_0.5_model_log <- lm(log_cases_by_pop~ `percent more than 0.5 occupants`, scc_curr_cases_dem %>% filter(cases != 0))

summary(scc_cases_occ_0.5_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ `percent more than 0.5 occupants`, 
##     data = scc_curr_cases_dem %>% filter(cases != 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.15948 -0.26884  0.04851  0.29399  1.33667 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                       -7.671811   0.311448 -24.633  < 2e-16 ***
## `percent more than 0.5 occupants`  0.020587   0.006671   3.086  0.00343 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5152 on 46 degrees of freedom
## Multiple R-squared:  0.1715, Adjusted R-squared:  0.1535 
## F-statistic: 9.524 on 1 and 46 DF,  p-value: 0.003428

Try 2 or more.

# Alameda
fig_ac_cases_occ_2 <- plot_ly(ac_curr_cases_dem) %>%
  add_trace(x = ~`percent more than 2 occupants`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~`percent more than 2 occupants`, y = fitted(lm(cases_by_pop~ `percent more than 2 occupants`, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent more than 2 occupants per room'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")

fig_ac_cases_occ_2
ac_cases_occ_2_model <- lm(cases_by_pop~ `percent more than 2 occupants`, ac_curr_cases_dem)

summary(ac_cases_occ_2_model)
## 
## Call:
## lm(formula = cases_by_pop ~ `percent more than 2 occupants`, 
##     data = ac_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0021828 -0.0006838 -0.0003256  0.0006245  0.0033987 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.0009738  0.0002440   3.991 0.000246 ***
## `percent more than 2 occupants` 0.0014473  0.0002622   5.520  1.7e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001282 on 44 degrees of freedom
## Multiple R-squared:  0.4092, Adjusted R-squared:  0.3957 
## F-statistic: 30.47 on 1 and 44 DF,  p-value: 1.703e-06
ac_cases_occ_2_model_log <- lm(log_cases_by_pop~ `percent more than 2 occupants`, ac_curr_cases_dem)

summary(ac_cases_occ_2_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ `percent more than 2 occupants`, 
##     data = ac_curr_cases_dem)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.39455 -0.53594 -0.07895  0.55651  1.45703 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      -6.9985     0.1332  -52.53  < 2e-16 ***
## `percent more than 2 occupants`   0.6041     0.1432    4.22  0.00012 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6998 on 44 degrees of freedom
## Multiple R-squared:  0.2881, Adjusted R-squared:  0.2719 
## F-statistic:  17.8 on 1 and 44 DF,  p-value: 0.0001204
# SCC
fig_scc_cases_occ_2 <- plot_ly(scc_curr_cases_dem) %>%
  add_trace(x = ~`percent more than 2 occupants`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~`percent more than 2 occupants`, y = fitted(lm(cases_by_pop~ `percent more than 2 occupants`, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent more than 2 occupants per room'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")

fig_scc_cases_occ_2
scc_cases_occ_2_model <- lm(cases_by_pop~ `percent more than 2 occupants`, scc_curr_cases_dem)

summary(scc_cases_occ_2_model)
## 
## Call:
## lm(formula = cases_by_pop ~ `percent more than 2 occupants`, 
##     data = scc_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0019458 -0.0004797  0.0000242  0.0004582  0.0031705 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.0008980  0.0001737   5.169 3.65e-06 ***
## `percent more than 2 occupants` 0.0004990  0.0001978   2.522   0.0147 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000899 on 53 degrees of freedom
## Multiple R-squared:  0.1072, Adjusted R-squared:  0.09033 
## F-statistic: 6.362 on 1 and 53 DF,  p-value: 0.0147
scc_cases_occ_2_model_log <- lm(log_cases_by_pop~ `percent more than 2 occupants`, scc_curr_cases_dem %>% filter(cases != 0))

summary(scc_cases_occ_2_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ `percent more than 2 occupants`, 
##     data = scc_curr_cases_dem %>% filter(cases != 0))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1767 -0.2703  0.0544  0.3493  1.3162 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      -6.9552     0.1108 -62.754   <2e-16 ***
## `percent more than 2 occupants`   0.3513     0.1308   2.686     0.01 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5263 on 46 degrees of freedom
## Multiple R-squared:  0.1356, Adjusted R-squared:  0.1168 
## F-statistic: 7.216 on 1 and 46 DF,  p-value: 0.01002

Units in structure.

# Alameda
fig_ac_cases_units_10 <- plot_ly(ac_curr_cases_dem) %>%
  add_trace(x = ~`percent 10 or more units`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~`percent 10 or more units`, y = fitted(lm(cases_by_pop~ `percent 10 or more units`, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent 10 or more units per structure'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")

fig_ac_cases_units_10
ac_cases_units_10_model <- lm(cases_by_pop~ `percent 10 or more units`, ac_curr_cases_dem)

summary(ac_cases_units_10_model)
## 
## Call:
## lm(formula = cases_by_pop ~ `percent 10 or more units`, data = ac_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0014979 -0.0011743 -0.0004578  0.0002407  0.0059375 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 1.900e-03  3.690e-04    5.15 5.87e-06 ***
## `percent 10 or more units` -4.902e-06  1.814e-05   -0.27    0.788    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001666 on 44 degrees of freedom
## Multiple R-squared:  0.001656,   Adjusted R-squared:  -0.02103 
## F-statistic: 0.07298 on 1 and 44 DF,  p-value: 0.7883
ac_cases_units_10_model_log <- lm(log_cases_by_pop~ `percent 10 or more units`, ac_curr_cases_dem)

summary(ac_cases_units_10_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ `percent 10 or more units`, data = ac_curr_cases_dem)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.33953 -0.71563 -0.00642  0.48547  1.78699 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                -6.671549   0.183603 -36.337   <2e-16 ***
## `percent 10 or more units`  0.001887   0.009028   0.209    0.835    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8289 on 44 degrees of freedom
## Multiple R-squared:  0.0009919,  Adjusted R-squared:  -0.02171 
## F-statistic: 0.04369 on 1 and 44 DF,  p-value: 0.8354
# SCC
fig_scc_cases_units_10 <- plot_ly(scc_curr_cases_dem) %>%
  add_trace(x = ~`percent 10 or more units`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~`percent 10 or more units`, y = fitted(lm(cases_by_pop~ `percent 10 or more units`, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent 10 or more units per structure'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")

fig_scc_cases_units_10
scc_cases_units_10_model <- lm(cases_by_pop~ `percent 10 or more units`, scc_curr_cases_dem)

summary(scc_cases_units_10_model)
## 
## Call:
## lm(formula = cases_by_pop ~ `percent 10 or more units`, data = scc_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.411e-03 -5.609e-04 -9.125e-05  3.514e-04  3.037e-03 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                1.157e-03  1.827e-04   6.331 5.36e-08 ***
## `percent 10 or more units` 3.089e-06  7.282e-06   0.424    0.673    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0009498 on 53 degrees of freedom
## Multiple R-squared:  0.003383,   Adjusted R-squared:  -0.01542 
## F-statistic: 0.1799 on 1 and 53 DF,  p-value: 0.6732
scc_cases_units_10_model_log <- lm(log_cases_by_pop~ `percent 10 or more units`, scc_curr_cases_dem %>% filter(cases != 0))

summary(scc_cases_units_10_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ `percent 10 or more units`, data = scc_curr_cases_dem %>% 
##     filter(cases != 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.22681 -0.36831 -0.03308  0.29044  1.27530 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                -6.772782   0.123451 -54.862   <2e-16 ***
## `percent 10 or more units`  0.002021   0.005452   0.371    0.713    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5652 on 46 degrees of freedom
## Multiple R-squared:  0.002978,   Adjusted R-squared:  -0.0187 
## F-statistic: 0.1374 on 1 and 46 DF,  p-value: 0.7126

Try with more than one unit.

# Alameda
fig_ac_cases_units_1 <- plot_ly(ac_curr_cases_dem) %>%
  add_trace(x = ~`percent more than 1 unit`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~`percent more than 1 unit`, y = fitted(lm(cases_by_pop~ `percent more than 1 unit`, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent more than 1 unit per structure'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")

fig_ac_cases_units_1
ac_cases_units_1_model <- lm(cases_by_pop~ `percent more than 1 unit`, ac_curr_cases_dem)

summary(ac_cases_units_1_model)
## 
## Call:
## lm(formula = cases_by_pop ~ `percent more than 1 unit`, data = ac_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0017272 -0.0011122 -0.0006374  0.0003547  0.0058540 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                1.623e-03  4.899e-04   3.314  0.00185 **
## `percent more than 1 unit` 5.257e-06  1.101e-05   0.477  0.63548   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001663 on 44 degrees of freedom
## Multiple R-squared:  0.005152,   Adjusted R-squared:  -0.01746 
## F-statistic: 0.2279 on 1 and 44 DF,  p-value: 0.6355
ac_cases_units_1_model_log <- lm(log_cases_by_pop~ `percent more than 1 unit`, ac_curr_cases_dem)

summary(ac_cases_units_1_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ `percent more than 1 unit`, data = ac_curr_cases_dem)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.42109 -0.70629 -0.04902  0.51872  1.73989 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                -6.746749   0.243625 -27.693   <2e-16 ***
## `percent more than 1 unit`  0.002697   0.005477   0.492    0.625    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8271 on 44 degrees of freedom
## Multiple R-squared:  0.005479,   Adjusted R-squared:  -0.01712 
## F-statistic: 0.2424 on 1 and 44 DF,  p-value: 0.6249
# SCC
fig_scc_cases_units_1 <- plot_ly(scc_curr_cases_dem) %>%
  add_trace(x = ~`percent more than 1 unit`, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~`percent more than 1 unit`, y = fitted(lm(cases_by_pop~ `percent more than 1 unit`, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent more than 1 unit per structure'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")

fig_scc_cases_units_1
scc_cases_units_1_model <- lm(cases_by_pop~ `percent more than 1 unit`, scc_curr_cases_dem)

summary(scc_cases_units_1_model)
## 
## Call:
## lm(formula = cases_by_pop ~ `percent more than 1 unit`, data = scc_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0014604 -0.0005129 -0.0001351  0.0003362  0.0030268 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                1.053e-03  2.422e-04   4.347 6.29e-05 ***
## `percent more than 1 unit` 4.199e-06  5.437e-06   0.772    0.443    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0009461 on 53 degrees of freedom
## Multiple R-squared:  0.01113,    Adjusted R-squared:  -0.007532 
## F-statistic: 0.5963 on 1 and 53 DF,  p-value: 0.4434
scc_cases_units_1_model_log <- lm(log_cases_by_pop~ `percent more than 1 unit`, scc_curr_cases_dem %>% filter(cases != 0))

summary(scc_cases_units_1_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ `percent more than 1 unit`, data = scc_curr_cases_dem %>% 
##     filter(cases != 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.24974 -0.35563 -0.02577  0.29671  1.27341 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                -6.795550   0.162824 -41.736   <2e-16 ***
## `percent more than 1 unit`  0.001513   0.003734   0.405    0.687    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5651 on 46 degrees of freedom
## Multiple R-squared:  0.003558,   Adjusted R-squared:  -0.0181 
## F-statistic: 0.1642 on 1 and 46 DF,  p-value: 0.6872

Not significant.

Try income.

# Alameda
fig_ac_cases_inc_75 <- plot_ly(ac_curr_cases_dem) %>%
  add_trace(x = ~percent_under_75000, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~percent_under_75000, y = fitted(lm(cases_by_pop~ percent_under_75000, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of households earning less than $75,000/yr'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")

fig_ac_cases_inc_75
ac_cases_inc_75_model <- lm(cases_by_pop~ percent_under_75000, ac_curr_cases_dem)

summary(ac_cases_inc_75_model)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_under_75000, data = ac_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0033557 -0.0007076 -0.0001242  0.0004247  0.0042952 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -6.313e-04  5.556e-04  -1.136    0.262    
## percent_under_75000  5.909e-05  1.247e-05   4.740 2.26e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001357 on 44 degrees of freedom
## Multiple R-squared:  0.338,  Adjusted R-squared:  0.323 
## F-statistic: 22.47 on 1 and 44 DF,  p-value: 2.265e-05
ac_cases_inc_75_model_log <- lm(log_cases_by_pop~ percent_under_75000, ac_curr_cases_dem)

summary(ac_cases_inc_75_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_75000, data = ac_curr_cases_dem)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.19537 -0.35435  0.03465  0.46874  1.20218 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -7.823860   0.281026 -27.840  < 2e-16 ***
## percent_under_75000  0.028401   0.006305   4.504 4.86e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6861 on 44 degrees of freedom
## Multiple R-squared:  0.3156, Adjusted R-squared:    0.3 
## F-statistic: 20.29 on 1 and 44 DF,  p-value: 4.861e-05
# SCC
fig_scc_cases_inc_75 <- plot_ly(scc_curr_cases_dem) %>%
  add_trace(x = ~percent_under_75000, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~percent_under_75000, y = fitted(lm(cases_by_pop~ percent_under_75000, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of households earning less than $75,000/yr'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")

fig_scc_cases_inc_75
scc_cases_inc_75_model <- lm(cases_by_pop~ percent_under_75000, scc_curr_cases_dem)

summary(scc_cases_inc_75_model)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_under_75000, data = scc_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0020102 -0.0004037 -0.0001126  0.0002820  0.0032014 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)   
## (Intercept)         1.182e-05  4.095e-04   0.029  0.97707   
## percent_under_75000 3.639e-05  1.189e-05   3.061  0.00346 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000877 on 53 degrees of freedom
## Multiple R-squared:  0.1502, Adjusted R-squared:  0.1342 
## F-statistic:  9.37 on 1 and 53 DF,  p-value: 0.003459
scc_cases_inc_75_model_log <- lm(log_cases_by_pop~ percent_under_75000, scc_curr_cases_dem %>% filter(cases != 0))

summary(scc_cases_inc_75_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_75000, data = scc_curr_cases_dem %>% 
##     filter(cases != 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.11075 -0.33640  0.02249  0.28161  1.34211 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -7.691623   0.244837 -31.415  < 2e-16 ***
## percent_under_75000  0.029067   0.007154   4.063 0.000187 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4856 on 46 degrees of freedom
## Multiple R-squared:  0.2641, Adjusted R-squared:  0.2481 
## F-statistic: 16.51 on 1 and 46 DF,  p-value: 0.0001869

Income as 100,000.

# Alameda
fig_ac_cases_inc_100 <- plot_ly(ac_curr_cases_dem) %>%
  add_trace(x = ~percent_under_100000, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~percent_under_100000, y = fitted(lm(cases_by_pop~ percent_under_100000, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of households earning less than $100,000/yr'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")

fig_ac_cases_inc_100
ac_cases_inc_100_model <- lm(cases_by_pop~ percent_under_100000, ac_curr_cases_dem)

summary(ac_cases_inc_100_model)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_under_100000, data = ac_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0030285 -0.0006396 -0.0001944  0.0004365  0.0041642 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -1.465e-03  6.486e-04  -2.259   0.0289 *  
## percent_under_100000  6.226e-05  1.172e-05   5.311 3.43e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001302 on 44 degrees of freedom
## Multiple R-squared:  0.3907, Adjusted R-squared:  0.3768 
## F-statistic: 28.21 on 1 and 44 DF,  p-value: 3.428e-06
ac_cases_inc_100_model_log <- lm(log_cases_by_pop~ percent_under_100000, ac_curr_cases_dem)

summary(ac_cases_inc_100_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_100000, data = ac_curr_cases_dem)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.06459 -0.32849  0.03041  0.40543  1.15621 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -8.28040    0.32257 -25.670  < 2e-16 ***
## percent_under_100000  0.03098    0.00583   5.314 3.39e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6472 on 44 degrees of freedom
## Multiple R-squared:  0.3909, Adjusted R-squared:  0.3771 
## F-statistic: 28.24 on 1 and 44 DF,  p-value: 3.393e-06
# SCC
fig_scc_cases_inc_100 <- plot_ly(scc_curr_cases_dem) %>%
  add_trace(x = ~percent_under_100000, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~percent_under_100000, y = fitted(lm(cases_by_pop~ percent_under_100000, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of households earning less than $100,000/yr'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")

fig_scc_cases_inc_100
scc_cases_inc_100_model <- lm(cases_by_pop~ percent_under_100000, scc_curr_cases_dem)

summary(scc_cases_inc_100_model)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_under_100000, data = scc_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.827e-03 -4.416e-04 -5.580e-06  2.418e-04  3.129e-03 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)   
## (Intercept)          -9.287e-05  4.555e-04  -0.204   0.8392   
## percent_under_100000  3.011e-05  1.015e-05   2.967   0.0045 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000881 on 53 degrees of freedom
## Multiple R-squared:  0.1425, Adjusted R-squared:  0.1263 
## F-statistic: 8.804 on 1 and 53 DF,  p-value: 0.004501
scc_cases_inc_100_model_log <- lm(cases_by_pop~ percent_under_100000, scc_curr_cases_dem %>% filter(cases != 0))

summary(scc_cases_inc_100_model_log)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_under_100000, data = scc_curr_cases_dem %>% 
##     filter(cases != 0))
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0010976 -0.0004645 -0.0001279  0.0001819  0.0029335 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -2.893e-04  4.195e-04  -0.690 0.493846    
## percent_under_100000  3.906e-05  9.428e-06   4.143 0.000145 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000757 on 46 degrees of freedom
## Multiple R-squared:  0.2717, Adjusted R-squared:  0.2559 
## F-statistic: 17.16 on 1 and 46 DF,  p-value: 0.0001453

Income as 125,000.

# Alameda
fig_ac_cases_inc_125 <- plot_ly(ac_curr_cases_dem) %>%
  add_trace(x = ~percent_under_125000, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~percent_under_125000, y = fitted(lm(cases_by_pop~ percent_under_125000, ac_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of households earning less than $125,000/yr'), yaxis = list(title = 'Current cases per capita'), title = "Alameda")

fig_ac_cases_inc_125
ac_cases_inc_125_model <- lm(cases_by_pop~ percent_under_125000, ac_curr_cases_dem)

summary(ac_cases_inc_125_model)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_under_125000, data = ac_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0028161 -0.0007231 -0.0002092  0.0004190  0.0043649 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -2.277e-03  7.816e-04  -2.913  0.00561 ** 
## percent_under_125000  6.556e-05  1.212e-05   5.412 2.45e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001292 on 44 degrees of freedom
## Multiple R-squared:  0.3996, Adjusted R-squared:  0.386 
## F-statistic: 29.29 on 1 and 44 DF,  p-value: 2.451e-06
ac_cases_inc_125_model_log <- lm(log_cases_by_pop~ percent_under_125000, ac_curr_cases_dem)

summary(ac_cases_inc_125_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_125000, data = ac_curr_cases_dem)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.98666 -0.32432  0.01612  0.37468  1.22150 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -8.768576   0.377622 -23.221  < 2e-16 ***
## percent_under_125000  0.033972   0.005853   5.804 6.54e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6242 on 44 degrees of freedom
## Multiple R-squared:  0.4336, Adjusted R-squared:  0.4208 
## F-statistic: 33.69 on 1 and 44 DF,  p-value: 6.544e-07
# SCC
fig_scc_cases_inc_125 <- plot_ly(scc_curr_cases_dem) %>%
  add_trace(x = ~percent_under_125000, y = ~cases_by_pop, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~percent_under_125000, y = fitted(lm(cases_by_pop~ percent_under_125000, scc_curr_cases_dem)), mode = 'lines', showlegend = F) %>%
  layout(xaxis = list(title = 'Percent of households earning less than $125,000/yr'), yaxis = list(title = 'Current cases per capita'), title = "Santa Clara")

fig_scc_cases_inc_125
scc_cases_inc_125_model <- lm(cases_by_pop~ percent_under_125000, scc_curr_cases_dem)

summary(scc_cases_inc_125_model)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_under_125000, data = scc_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0016302 -0.0004194 -0.0000520  0.0002166  0.0031734 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)   
## (Intercept)          -3.116e-04  5.140e-04  -0.606  0.54695   
## percent_under_125000  2.898e-05  9.514e-06   3.046  0.00361 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0008777 on 53 degrees of freedom
## Multiple R-squared:  0.149,  Adjusted R-squared:  0.1329 
## F-statistic: 9.277 on 1 and 53 DF,  p-value: 0.003611
scc_cases_inc_125_model_log <- lm(log_cases_by_pop~ percent_under_125000, scc_curr_cases_dem %>% filter(cases != 0))

summary(scc_cases_inc_125_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_125000, data = scc_curr_cases_dem %>% 
##     filter(cases != 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.16009 -0.34031 -0.01414  0.23537  1.31855 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -7.957829   0.300718 -26.463  < 2e-16 ***
## percent_under_125000  0.023324   0.005596   4.168 0.000134 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4823 on 46 degrees of freedom
## Multiple R-squared:  0.2741, Adjusted R-squared:  0.2584 
## F-statistic: 17.37 on 1 and 46 DF,  p-value: 0.0001341

The higher thresholds are more effective for Alameda, SCC is about the same for 75,000 and 125,000. So overall, 125,000 seems best.

Try multiple regression analysis with these.

ac_cases_dem_model <- lm(cases_by_pop ~ percent_under_125000 + avg_household_size + pop_density + `percent more than 1 occupant` + `percent more than 1 unit`, ac_curr_cases_dem)

summary(ac_cases_dem_model)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_under_125000 + avg_household_size + 
##     pop_density + `percent more than 1 occupant` + `percent more than 1 unit`, 
##     data = ac_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0015848 -0.0004709 -0.0001101  0.0004055  0.0028513 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                     2.735e-03  2.665e-03   1.026   0.3109   
## percent_under_125000            5.800e-05  1.662e-05   3.489   0.0012 **
## avg_household_size             -1.568e-03  8.867e-04  -1.768   0.0847 . 
## pop_density                    -1.091e-01  8.699e-02  -1.254   0.2171   
## `percent more than 1 occupant`  2.626e-04  7.589e-05   3.460   0.0013 **
## `percent more than 1 unit`     -4.230e-05  1.584e-05  -2.670   0.0109 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0009469 on 40 degrees of freedom
## Multiple R-squared:  0.7068, Adjusted R-squared:  0.6702 
## F-statistic: 19.29 on 5 and 40 DF,  p-value: 9.935e-10
scc_cases_dem_model <- lm(cases_by_pop ~ percent_under_125000 + avg_household_size + pop_density + `percent more than 1 occupant` + `percent more than 1 unit`, scc_curr_cases_dem)

summary(scc_cases_dem_model)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_under_125000 + avg_household_size + 
##     pop_density + `percent more than 1 occupant` + `percent more than 1 unit`, 
##     data = scc_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0016646 -0.0005053 -0.0000357  0.0003541  0.0034148 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)                    -2.056e-04  1.719e-03  -0.120    0.905
## percent_under_125000           -4.247e-06  1.341e-05  -0.317    0.753
## avg_household_size              3.082e-04  5.482e-04   0.562    0.577
## pop_density                    -1.075e-02  7.314e-02  -0.147    0.884
## `percent more than 1 occupant`  8.505e-05  5.151e-05   1.651    0.105
## `percent more than 1 unit`      2.851e-06  1.149e-05   0.248    0.805
## 
## Residual standard error: 0.0008112 on 49 degrees of freedom
## Multiple R-squared:  0.3278, Adjusted R-squared:  0.2592 
## F-statistic: 4.779 on 5 and 49 DF,  p-value: 0.001232

Hmm, that’s interesting. Also with log of cases.

ac_cases_dem_model_log <- lm(log_cases_by_pop ~ percent_under_125000 + avg_household_size + pop_density + `percent more than 1 occupant` + `percent more than 1 unit`, ac_curr_cases_dem)

summary(ac_cases_dem_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_125000 + avg_household_size + 
##     pop_density + `percent more than 1 occupant` + `percent more than 1 unit`, 
##     data = ac_curr_cases_dem)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84684 -0.29885 -0.07084  0.27367  1.04048 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    -7.611e+00  1.315e+00  -5.786 9.45e-07 ***
## percent_under_125000            4.318e-02  8.205e-03   5.263 5.11e-06 ***
## avg_household_size             -4.548e-01  4.376e-01  -1.039  0.30496    
## pop_density                    -1.175e+02  4.293e+01  -2.738  0.00918 ** 
## `percent more than 1 occupant`  7.064e-02  3.746e-02   1.886  0.06657 .  
## `percent more than 1 unit`     -1.607e-02  7.818e-03  -2.055  0.04643 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4673 on 40 degrees of freedom
## Multiple R-squared:  0.7114, Adjusted R-squared:  0.6753 
## F-statistic: 19.72 on 5 and 40 DF,  p-value: 7.33e-10
scc_cases_dem_model_log <- lm(log_cases_by_pop ~ percent_under_125000 + avg_household_size + pop_density + `percent more than 1 occupant` + `percent more than 1 unit`, scc_curr_cases_dem %>% filter(cases != 0))

summary(scc_cases_dem_model_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_125000 + avg_household_size + 
##     pop_density + `percent more than 1 occupant` + `percent more than 1 unit`, 
##     data = scc_curr_cases_dem %>% filter(cases != 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.08976 -0.25029 -0.01391  0.26471  1.37277 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -9.447739   1.129114  -8.367 1.73e-10 ***
## percent_under_125000             0.016489   0.009430   1.748   0.0877 .  
## avg_household_size               0.581402   0.356598   1.630   0.1105    
## pop_density                    -29.946703  58.962583  -0.508   0.6142    
## `percent more than 1 occupant`  -0.018551   0.034780  -0.533   0.5966    
## `percent more than 1 unit`       0.008039   0.007354   1.093   0.2806    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4752 on 42 degrees of freedom
## Multiple R-squared:  0.3566, Adjusted R-squared:   0.28 
## F-statistic: 4.656 on 5 and 42 DF,  p-value: 0.001801

Just include the variables that were relevant before.

ac_cases_dem_model_2 <- lm(cases_by_pop ~ percent_under_125000 + avg_household_size  + `percent more than 1 occupant`, ac_curr_cases_dem)

summary(ac_cases_dem_model_2)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_under_125000 + avg_household_size + 
##     `percent more than 1 occupant`, data = ac_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0024512 -0.0006169 -0.0000824  0.0004135  0.0033460 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                    -3.546e-03  1.976e-03  -1.794   0.0799 .
## percent_under_125000            3.628e-05  1.702e-05   2.131   0.0390 *
## avg_household_size              8.022e-04  5.582e-04   1.437   0.1581  
## `percent more than 1 occupant`  1.227e-04  6.832e-05   1.796   0.0796 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.001051 on 42 degrees of freedom
## Multiple R-squared:  0.6209, Adjusted R-squared:  0.5938 
## F-statistic: 22.93 on 3 and 42 DF,  p-value: 5.987e-09
scc_cases_dem_model_2 <- lm(cases_by_pop ~ percent_under_125000 + avg_household_size + `percent more than 1 occupant`, scc_curr_cases_dem)

summary(scc_cases_dem_model_2)
## 
## Call:
## lm(formula = cases_by_pop ~ percent_under_125000 + avg_household_size + 
##     `percent more than 1 occupant`, data = scc_curr_cases_dem)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0015811 -0.0005093 -0.0000437  0.0003465  0.0034956 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                     1.483e-04  8.118e-04   0.183  0.85581   
## percent_under_125000           -4.015e-06  1.269e-05  -0.316  0.75295   
## avg_household_size              1.927e-04  2.297e-04   0.839  0.40557   
## `percent more than 1 occupant`  9.362e-05  3.205e-05   2.921  0.00519 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0007959 on 51 degrees of freedom
## Multiple R-squared:  0.3266, Adjusted R-squared:  0.287 
## F-statistic: 8.245 on 3 and 51 DF,  p-value: 0.0001432

Log of cases.

ac_cases_dem_model_2_log <- lm(log_cases_by_pop ~ percent_under_125000 + avg_household_size  + `percent more than 1 occupant`, ac_curr_cases_dem)

summary(ac_cases_dem_model_2_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_125000 + avg_household_size + 
##     `percent more than 1 occupant`, data = ac_curr_cases_dem)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.80150 -0.28202 -0.01959  0.29973  1.37823 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    -10.511209   1.019415 -10.311 4.46e-13 ***
## percent_under_125000             0.030664   0.008782   3.492  0.00114 ** 
## avg_household_size               0.676370   0.287973   2.349  0.02362 *  
## `percent more than 1 occupant`   0.006504   0.035247   0.185  0.85448    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5421 on 42 degrees of freedom
## Multiple R-squared:  0.5922, Adjusted R-squared:  0.5631 
## F-statistic: 20.33 on 3 and 42 DF,  p-value: 2.708e-08
scc_cases_dem_model_2_log <- lm(log_cases_by_pop ~ percent_under_125000 + avg_household_size + `percent more than 1 occupant`, scc_curr_cases_dem %>% filter(cases != 0))

summary(scc_cases_dem_model_2_log)
## 
## Call:
## lm(formula = log_cases_by_pop ~ percent_under_125000 + avg_household_size + 
##     `percent more than 1 occupant`, data = scc_curr_cases_dem %>% 
##     filter(cases != 0))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.02152 -0.23485 -0.03309  0.27829  1.60619 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    -8.390980   0.540434 -15.526   <2e-16 ***
## percent_under_125000            0.015706   0.008808   1.783   0.0815 .  
## avg_household_size              0.255588   0.158128   1.616   0.1132    
## `percent more than 1 occupant`  0.007314   0.021760   0.336   0.7384    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4732 on 44 degrees of freedom
## Multiple R-squared:  0.3317, Adjusted R-squared:  0.2861 
## F-statistic: 7.278 on 3 and 44 DF,  p-value: 0.0004564

Change in cases from baseline level

I will use 0.0004 cases/person as the baseline level, and look at change in cases 2 weeks after achieving that rate. Also get visits in this range too, using a 7 day lag on visits.

min_baseline_cases = 10
init_baseline_cases_by_pop = 0.0004
init_weeks_later = 2
init_lag_visits = 7
# load the visits data
ac_daily_visits_zip <- readRDS(paste0(sg_path, "weekly-patterns/v2/ac_zip_visits_daily_sum_hourly_03-09-20_05-24-20.rds"))

ac_cases_cutoff <- ac_cases_zip %>% 
    filter(cases >= min_baseline_cases) %>% # removing ones that didn't hit at least 10 cases per person
    filter(cases_by_pop >= init_baseline_cases_by_pop) %>%
    mutate(log_cases_by_pop = log(cases_by_pop))
  zipcodes_in_cutoff <- unique(ac_cases_cutoff$zipcode)
  
  # get the change in cases and total visit hours in the time period of interest
  later_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
  later_log_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
  total_visits <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
  
  for (i in 1:length(zipcodes_in_cutoff)) {
    curr_zip <- zipcodes_in_cutoff[i]
    curr_vals <- ac_cases_cutoff %>% filter(zipcode == curr_zip)
    # first need to check that the date ranges chosen here are valid for these zip codes - i.e. there is actually enough case data for this to be valid, and visits data as well 
    change_cases_curr = NA
    change_log_cases_curr = NA
    total_visits_curr = NA
    if ((max(curr_vals$date) >= curr_vals$date[1] + init_weeks_later*7 + 1) & (max(ac_daily_visits_zip$date) >= curr_vals$date[1] + init_weeks_later*7 + 1 - init_lag_visits) & (min(ac_daily_visits_zip$date) <= curr_vals$date[1] - init_lag_visits)) {
      # change in cases an amount of time later
    change_cases_curr = curr_vals$cases_by_pop[init_weeks_later*7 + 1] - curr_vals$cases_by_pop[1]
    change_log_cases_curr = curr_vals$log_cases_by_pop[init_weeks_later*7 + 1] - curr_vals$log_cases_by_pop[1]
    # get the total visits in the time range of interest
    visits_curr = ac_daily_visits_zip %>% 
      filter(zipcode == curr_zip) %>% 
      filter(date >= (curr_vals$date[1] - init_lag_visits) & date < (curr_vals$date[init_weeks_later*7 + 1] - init_lag_visits)) %>% 
      summarize(total_visits_high = sum(total_visits_high),
              total_visits_low = sum(total_visits_low)) %>%
      mutate(total_visits_avg = (total_visits_high + total_visits_low)/2)
    total_visits_curr <- visits_curr$total_visits_avg
    }
    later_cases_change[i] = change_cases_curr
    later_log_cases_change[i] = change_log_cases_curr
    total_visits[i] = total_visits_curr
  }
  
  # combine
  ac_data_change <- data.frame(zipcodes_in_cutoff, later_cases_change, later_log_cases_change, total_visits, stringsAsFactors = F) %>%
    filter(!is.na(later_cases_change) & !is.na(total_visits)) %>% # only select ones that had valid date ranges
    rename(zipcode = zipcodes_in_cutoff) %>%
    left_join(ac_cases_zip %>% filter(date == max(date)) %>% dplyr::select(zipcode, total_pop_zip)) %>%
    mutate(visits_per_capita = total_visits / total_pop_zip)
    
# get linear model values for cases and visits
  ac_visits_cases_change_model <- lm(later_cases_change ~ visits_per_capita, ac_data_change)

  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~visits_per_capita, y = fitted(lm(later_cases_change ~ visits_per_capita, ac_data_change)), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visit-hours per person'), yaxis = list(title = 'Change in cases per person from baseline 0.0004'), title = "Alameda, 2 weeks after baseline")
  summary(ac_visits_cases_change_model)
## 
## Call:
## lm(formula = later_cases_change ~ visits_per_capita, data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -4.761e-04 -1.728e-04 -6.878e-05  2.618e-04  6.193e-04 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       -0.0004317  0.0003538  -1.220   0.2311  
## visits_per_capita  0.0000898  0.0000393   2.285   0.0289 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0002873 on 33 degrees of freedom
## Multiple R-squared:  0.1366, Adjusted R-squared:  0.1104 
## F-statistic:  5.22 on 1 and 33 DF,  p-value: 0.02888
  # log
  ac_visits_cases_change_model_log <- lm(later_log_cases_change ~ visits_per_capita, ac_data_change)

  ac_data_change %>%
  plot_ly() %>%
  add_trace(x = ~visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~visits_per_capita, y = fitted(lm(later_log_cases_change ~ visits_per_capita, ac_data_change)), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model_log)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visit-hours per person'), yaxis = list(title = 'Change in log(cases per person) from baseline 0.0004'), title = "Alameda, 2 weeks after baseline")
  summary(ac_visits_cases_change_model_log)
## 
## Call:
## lm(formula = later_log_cases_change ~ visits_per_capita, data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.60722 -0.21542 -0.01093  0.25297  0.61235 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        -0.4657     0.4177  -1.115   0.2730  
## visits_per_capita   0.1120     0.0464   2.415   0.0215 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3392 on 33 degrees of freedom
## Multiple R-squared:  0.1501, Adjusted R-squared:  0.1244 
## F-statistic:  5.83 on 1 and 33 DF,  p-value: 0.02146

Repeat the demographic variable correlations. Without plots this time to reduce space.

ac_data_change <- ac_data_change %>% left_join(ac_curr_cases_dem %>% dplyr::select(-c(date, cases, cases_by_pop, total_pop_zip, geometry)))

runCorrelationsAl <- function(ac_data_change) {

# household size
ac_cases_change_hhs_model <- lm(later_cases_change~ avg_household_size, ac_data_change)

print(summary(ac_cases_change_hhs_model))

# also do with log of cases
ac_cases_change_hhs_model_log <- lm(later_log_cases_change~ avg_household_size, ac_data_change)

print(summary(ac_cases_change_hhs_model_log))

# population density
ac_cases_change_pop_dens_model <- lm(later_cases_change~ pop_density, ac_data_change)

print(summary(ac_cases_change_pop_dens_model))

# also do with log of cases
ac_cases_change_pop_dens_model_log <- lm(later_log_cases_change~ pop_density, ac_data_change)

print(summary(ac_cases_change_pop_dens_model_log))

# occupants per room
ac_cases_change_occ_1_model <- lm(later_cases_change~ `percent more than 1 occupant`, ac_data_change)

print(summary(ac_cases_change_occ_1_model))

# also do with log of cases
ac_cases_change_occ_1_model_log <- lm(later_log_cases_change~ `percent more than 1 occupant`, ac_data_change)

print(summary(ac_cases_change_occ_1_model_log))

# more than 0.5 occupants
ac_cases_change_occ_0.5_model <- lm(later_cases_change~ `percent more than 0.5 occupants`, ac_data_change)

print(summary(ac_cases_change_occ_0.5_model))

# also do with log of cases
ac_cases_change_occ_0.5_model_log <- lm(later_log_cases_change~ `percent more than 0.5 occupants`, ac_data_change)

print(summary(ac_cases_change_occ_0.5_model_log))

# units in structure
ac_cases_change_units_10_model <- lm(later_cases_change~ `percent 10 or more units`, ac_data_change)

print(summary(ac_cases_change_units_10_model))

# also do with log of cases
ac_cases_change_units_10_model_log <- lm(later_log_cases_change~ `percent 10 or more units`, ac_data_change)

print(summary(ac_cases_change_units_10_model_log))

# more than 1 unit
ac_cases_change_units_1_model <- lm(later_cases_change~ `percent more than 1 unit`, ac_data_change)

print(summary(ac_cases_change_units_1_model))

# also do with log of cases
ac_cases_change_units_1_model_log <- lm(later_log_cases_change~ `percent more than 1 unit`, ac_data_change)

print(summary(ac_cases_change_units_1_model_log))

# income
ac_cases_change_inc_75_model <- lm(later_cases_change~ percent_under_75000, ac_data_change)

print(summary(ac_cases_change_inc_75_model))

# also do with log of cases
ac_cases_change_inc_75_model_log <- lm(later_log_cases_change~ percent_under_75000, ac_data_change)

print(summary(ac_cases_change_inc_75_model_log))

ac_cases_change_inc_100_model <- lm(later_cases_change~ percent_under_100000, ac_data_change)

print(summary(ac_cases_change_inc_100_model))

# also do with log of cases
ac_cases_change_inc_100_model_log <- lm(later_log_cases_change~ percent_under_100000, ac_data_change)

print(summary(ac_cases_change_inc_100_model_log))

ac_cases_change_inc_125_model <- lm(later_cases_change~ percent_under_125000, ac_data_change)

print(summary(ac_cases_change_inc_125_model))

# also do with log of cases
ac_cases_change_inc_125_model_log <- lm(later_log_cases_change~ percent_under_125000, ac_data_change)

print(summary(ac_cases_change_inc_125_model_log))
}

runCorrelationsAl(ac_data_change)
## 
## Call:
## lm(formula = later_cases_change ~ avg_household_size, data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -4.156e-04 -2.063e-04 -4.247e-05  1.727e-04  5.461e-04 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)   
## (Intercept)        -0.0004576  0.0003011   -1.52  0.13808   
## avg_household_size  0.0002842  0.0001022    2.78  0.00891 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0002783 on 33 degrees of freedom
## Multiple R-squared:  0.1898, Adjusted R-squared:  0.1652 
## F-statistic: 7.728 on 1 and 33 DF,  p-value: 0.008909
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ avg_household_size, data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.58478 -0.26170  0.01507  0.25189  0.56696 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)         -0.4458     0.3587  -1.243  0.22266   
## avg_household_size   0.3366     0.1218   2.764  0.00927 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3316 on 33 degrees of freedom
## Multiple R-squared:  0.188,  Adjusted R-squared:  0.1634 
## F-statistic: 7.639 on 1 and 33 DF,  p-value: 0.00927
## 
## 
## Call:
## lm(formula = later_cases_change ~ pop_density, data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -3.663e-04 -2.714e-04 -9.122e-05  2.716e-04  7.693e-04 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0003661  0.0000860   4.257 0.000161 ***
## pop_density 0.0011890  0.0271285   0.044 0.965306    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0003092 on 33 degrees of freedom
## Multiple R-squared:  5.82e-05,   Adjusted R-squared:  -0.03024 
## F-statistic: 0.001921 on 1 and 33 DF,  p-value: 0.9653
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ pop_density, data = ac_data_change)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5225 -0.3242 -0.0900  0.3006  0.8031 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.5217     0.1023   5.099 1.38e-05 ***
## pop_density   4.6313    32.2748   0.143    0.887    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3679 on 33 degrees of freedom
## Multiple R-squared:  0.0006236,  Adjusted R-squared:  -0.02966 
## F-statistic: 0.02059 on 1 and 33 DF,  p-value: 0.8868
## 
## 
## Call:
## lm(formula = later_cases_change ~ `percent more than 1 occupant`, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -4.332e-04 -1.693e-04 -7.690e-06  1.320e-04  5.607e-04 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    8.081e-05  7.559e-05   1.069    0.293    
## `percent more than 1 occupant` 3.637e-05  8.011e-06   4.540  7.1e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0002426 on 33 degrees of freedom
## Multiple R-squared:  0.3845, Adjusted R-squared:  0.3658 
## F-statistic: 20.61 on 1 and 33 DF,  p-value: 7.104e-05
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ `percent more than 1 occupant`, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.50233 -0.21460  0.05337  0.19407  0.71268 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    0.183581   0.088837   2.067   0.0467 *  
## `percent more than 1 occupant` 0.044128   0.009414   4.687 4.62e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2851 on 33 degrees of freedom
## Multiple R-squared:  0.3997, Adjusted R-squared:  0.3815 
## F-statistic: 21.97 on 1 and 33 DF,  p-value: 4.622e-05
## 
## 
## Call:
## lm(formula = later_cases_change ~ `percent more than 0.5 occupants`, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -4.428e-04 -2.473e-04 -8.570e-06  2.242e-04  5.800e-04 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                       -1.314e-04  1.992e-04  -0.660   0.5141  
## `percent more than 0.5 occupants`  1.143e-05  4.419e-06   2.587   0.0143 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000282 on 33 degrees of freedom
## Multiple R-squared:  0.1686, Adjusted R-squared:  0.1434 
## F-statistic: 6.693 on 1 and 33 DF,  p-value: 0.01427
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ `percent more than 0.5 occupants`, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.52310 -0.27526 -0.01809  0.28931  0.59815 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                       -0.069643   0.236506  -0.294    0.770  
## `percent more than 0.5 occupants`  0.013776   0.005246   2.626    0.013 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3347 on 33 degrees of freedom
## Multiple R-squared:  0.1728, Adjusted R-squared:  0.1478 
## F-statistic: 6.896 on 1 and 33 DF,  p-value: 0.013
## 
## 
## Call:
## lm(formula = later_cases_change ~ `percent 10 or more units`, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -4.013e-04 -2.548e-04 -8.722e-05  2.641e-04  7.579e-04 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 4.014e-04  8.043e-05   4.991 1.89e-05 ***
## `percent 10 or more units` -1.976e-06  3.750e-06  -0.527    0.602    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0003079 on 33 degrees of freedom
## Multiple R-squared:  0.008348,   Adjusted R-squared:  -0.0217 
## F-statistic: 0.2778 on 1 and 33 DF,  p-value: 0.6017
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ `percent 10 or more units`, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.56708 -0.30077 -0.07125  0.28156  0.78729 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 0.567209   0.095802   5.921 1.22e-06 ***
## `percent 10 or more units` -0.002069   0.004466  -0.463    0.646    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3668 on 33 degrees of freedom
## Multiple R-squared:  0.00646,    Adjusted R-squared:  -0.02365 
## F-statistic: 0.2146 on 1 and 33 DF,  p-value: 0.6463
## 
## 
## Call:
## lm(formula = later_cases_change ~ `percent more than 1 unit`, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -3.619e-04 -2.721e-04 -9.047e-05  2.704e-04  7.669e-04 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                3.614e-04  1.102e-04   3.280  0.00246 **
## `percent more than 1 unit` 2.031e-07  2.557e-06   0.079  0.93716   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0003092 on 33 degrees of freedom
## Multiple R-squared:  0.0001912,  Adjusted R-squared:  -0.03011 
## F-statistic: 0.006312 on 1 and 33 DF,  p-value: 0.9372
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ `percent more than 1 unit`, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51724 -0.32555 -0.09264  0.29872  0.79562 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                0.5162009  0.1311077   3.937 0.000402 ***
## `percent more than 1 unit` 0.0004527  0.0030420   0.149 0.882596    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3679 on 33 degrees of freedom
## Multiple R-squared:  0.0006707,  Adjusted R-squared:  -0.02961 
## F-statistic: 0.02215 on 1 and 33 DF,  p-value: 0.8826
## 
## 
## Call:
## lm(formula = later_cases_change ~ percent_under_75000, data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -3.950e-04 -1.698e-04 -8.015e-05  2.037e-04  5.911e-04 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)   
## (Intercept)         -3.869e-05  1.333e-04  -0.290  0.77345   
## percent_under_75000  9.459e-06  2.907e-06   3.254  0.00263 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0002691 on 33 degrees of freedom
## Multiple R-squared:  0.2429, Adjusted R-squared:   0.22 
## F-statistic: 10.59 on 1 and 33 DF,  p-value: 0.002628
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ percent_under_75000, data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.51572 -0.22338 -0.05969  0.26202  0.74802 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)   
## (Intercept)         0.041832   0.157987   0.265  0.79283   
## percent_under_75000 0.011402   0.003445   3.310  0.00227 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3189 on 33 degrees of freedom
## Multiple R-squared:  0.2492, Adjusted R-squared:  0.2265 
## F-statistic: 10.96 on 1 and 33 DF,  p-value: 0.002265
## 
## 
## Call:
## lm(formula = later_cases_change ~ percent_under_100000, data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -3.992e-04 -1.651e-04 -3.765e-05  1.924e-04  6.103e-04 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -1.839e-04  1.568e-04  -1.172 0.249400    
## percent_under_100000  1.011e-05  2.751e-06   3.674 0.000841 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0002605 on 33 degrees of freedom
## Multiple R-squared:  0.2903, Adjusted R-squared:  0.2688 
## F-statistic:  13.5 on 1 and 33 DF,  p-value: 0.0008407
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ percent_under_100000, data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.50882 -0.20316 -0.04563  0.24008  0.77156 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -0.134148   0.185533  -0.723 0.474749    
## percent_under_100000  0.012199   0.003254   3.749 0.000683 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3082 on 33 degrees of freedom
## Multiple R-squared:  0.2987, Adjusted R-squared:  0.2774 
## F-statistic: 14.05 on 1 and 33 DF,  p-value: 0.0006825
## 
## 
## Call:
## lm(formula = later_cases_change ~ percent_under_125000, data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -3.983e-04 -1.886e-04 -3.339e-05  1.615e-04  5.976e-04 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -3.364e-04  1.906e-04  -1.765 0.086766 .  
## percent_under_125000  1.090e-05  2.866e-06   3.803 0.000586 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0002578 on 33 degrees of freedom
## Multiple R-squared:  0.3047, Adjusted R-squared:  0.2837 
## F-statistic: 14.46 on 1 and 33 DF,  p-value: 0.0005862
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ percent_under_125000, data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.47456 -0.20218 -0.06555  0.23659  0.75644 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -0.318931   0.225242  -1.416 0.166161    
## percent_under_125000  0.013168   0.003388   3.887 0.000463 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3048 on 33 degrees of freedom
## Multiple R-squared:  0.3141, Adjusted R-squared:  0.2933 
## F-statistic: 15.11 on 1 and 33 DF,  p-value: 0.0004631

Try multiple regression analysis with these.

ac_cases_dem_model_visits <- lm(later_cases_change ~ percent_under_125000 + avg_household_size + pop_density + `percent more than 1 occupant` + `percent more than 1 unit` + visits_per_capita, ac_data_change)

summary(ac_cases_dem_model_visits)
## 
## Call:
## lm(formula = later_cases_change ~ percent_under_125000 + avg_household_size + 
##     pop_density + `percent more than 1 occupant` + `percent more than 1 unit` + 
##     visits_per_capita, data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -3.055e-04 -1.512e-04 -2.944e-05  1.379e-04  6.612e-04 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                    -1.451e-04  9.952e-04  -0.146   0.8851  
## percent_under_125000            1.205e-05  4.409e-06   2.734   0.0107 *
## avg_household_size              1.204e-06  3.133e-04   0.004   0.9970  
## pop_density                    -4.907e-02  3.133e-02  -1.567   0.1284  
## `percent more than 1 occupant`  2.201e-05  2.407e-05   0.915   0.3683  
## `percent more than 1 unit`     -2.943e-06  5.490e-06  -0.536   0.5962  
## visits_per_capita              -2.341e-05  4.700e-05  -0.498   0.6224  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0002302 on 28 degrees of freedom
## Multiple R-squared:  0.5297, Adjusted R-squared:  0.429 
## F-statistic: 5.257 on 6 and 28 DF,  p-value: 0.0009801
ac_cases_dem_model_visits_log <- lm(later_log_cases_change ~ percent_under_125000 + avg_household_size + pop_density + `percent more than 1 occupant` + `percent more than 1 unit` + visits_per_capita, ac_data_change)

summary(ac_cases_dem_model_visits_log)
## 
## Call:
## lm(formula = later_log_cases_change ~ percent_under_125000 + 
##     avg_household_size + pop_density + `percent more than 1 occupant` + 
##     `percent more than 1 unit` + visits_per_capita, data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35512 -0.19725 -0.04894  0.16358  0.81897 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                     -0.079377   1.182353  -0.067    0.947  
## percent_under_125000             0.013729   0.005238   2.621    0.014 *
## avg_household_size              -0.032296   0.372196  -0.087    0.931  
## pop_density                    -52.065401  37.215592  -1.399    0.173  
## `percent more than 1 occupant`   0.028072   0.028599   0.982    0.335  
## `percent more than 1 unit`      -0.003730   0.006522  -0.572    0.572  
## visits_per_capita               -0.014785   0.055838  -0.265    0.793  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2735 on 28 degrees of freedom
## Multiple R-squared:  0.5313, Adjusted R-squared:  0.4309 
## F-statistic:  5.29 on 6 and 28 DF,  p-value: 0.00094

Huh, again interesting.

Just include the variables that were relevant before.

ac_cases_dem_model_visits_2 <- lm(later_cases_change ~ percent_under_125000 + avg_household_size + `percent more than 1 occupant` + visits_per_capita, ac_data_change)

summary(ac_cases_dem_model_visits_2)
## 
## Call:
## lm(formula = later_cases_change ~ percent_under_125000 + avg_household_size + 
##     `percent more than 1 occupant` + visits_per_capita, data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0003573 -0.0001666 -0.0000494  0.0001466  0.0006220 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                    -9.940e-04  5.188e-04  -1.916   0.0650 .
## percent_under_125000            9.605e-06  4.235e-06   2.268   0.0307 *
## avg_household_size              2.392e-04  1.540e-04   1.553   0.1308  
## `percent more than 1 occupant`  4.693e-06  1.589e-05   0.295   0.7698  
## visits_per_capita               9.312e-07  4.520e-05   0.021   0.9837  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000234 on 30 degrees of freedom
## Multiple R-squared:  0.4793, Adjusted R-squared:  0.4099 
## F-statistic: 6.903 on 4 and 30 DF,  p-value: 0.0004594
ac_cases_dem_model_visits_log_2 <- lm(later_log_cases_change ~percent_under_125000 + avg_household_size + `percent more than 1 occupant` + visits_per_capita, ac_data_change)

summary(ac_cases_dem_model_visits_log_2)
## 
## Call:
## lm(formula = later_log_cases_change ~ percent_under_125000 + 
##     avg_household_size + `percent more than 1 occupant` + visits_per_capita, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.37323 -0.20158 -0.06058  0.18980  0.77190 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                    -1.074013   0.611642  -1.756   0.0893 .
## percent_under_125000            0.011095   0.004993   2.222   0.0340 *
## avg_household_size              0.250766   0.181552   1.381   0.1774  
## `percent more than 1 occupant`  0.007669   0.018736   0.409   0.6852  
## visits_per_capita               0.011114   0.053290   0.209   0.8362  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2759 on 30 degrees of freedom
## Multiple R-squared:  0.489,  Adjusted R-squared:  0.4208 
## F-statistic: 7.177 on 4 and 30 DF,  p-value: 0.0003527

Try also with 5 week change in cases after the baseline level.

init_weeks_later = 5

  # get the change in cases and total visit hours in the time period of interest
  later_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
  later_log_cases_change <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
  total_visits <- vector(mode = "numeric", length = length(zipcodes_in_cutoff))
  
  for (i in 1:length(zipcodes_in_cutoff)) {
    curr_zip <- zipcodes_in_cutoff[i]
    curr_vals <- ac_cases_cutoff %>% filter(zipcode == curr_zip)
    # first need to check that the date ranges chosen here are valid for these zip codes - i.e. there is actually enough case data for this to be valid, and visits data as well 
    change_cases_curr = NA
    change_log_cases_curr = NA
    total_visits_curr = NA
    # check cases first
    if ((max(curr_vals$date) >= curr_vals$date[1] + init_weeks_later*7 + 1)) {
      # change in cases an amount of time later
    change_cases_curr = curr_vals$cases_by_pop[init_weeks_later*7 + 1] - curr_vals$cases_by_pop[1]
    change_log_cases_curr = curr_vals$log_cases_by_pop[init_weeks_later*7 + 1] - curr_vals$log_cases_by_pop[1]
    }
    # check visits
    if ((max(ac_daily_visits_zip$date) >= curr_vals$date[1] + init_weeks_later*7 + 1 - init_lag_visits) & (min(ac_daily_visits_zip$date) <= curr_vals$date[1] - init_lag_visits)) {
    # get the total visits in the time range of interest
    visits_curr = ac_daily_visits_zip %>% 
      filter(zipcode == curr_zip) %>% 
      filter(date >= (curr_vals$date[1] - init_lag_visits) & date < (curr_vals$date[init_weeks_later*7 + 1] - init_lag_visits)) %>% 
      summarize(total_visits_high = sum(total_visits_high),
              total_visits_low = sum(total_visits_low)) %>%
      mutate(total_visits_avg = (total_visits_high + total_visits_low)/2)
    total_visits_curr <- visits_curr$total_visits_avg
    }
    later_cases_change[i] = change_cases_curr
    later_log_cases_change[i] = change_log_cases_curr
    total_visits[i] = total_visits_curr
  }
  
  # combine
  ac_data_change_5 <- data.frame(zipcodes_in_cutoff, later_cases_change, later_log_cases_change, total_visits, stringsAsFactors = F) %>%
    filter(!is.na(later_cases_change)) %>% 
    rename(zipcode = zipcodes_in_cutoff) %>%
    left_join(ac_cases_zip %>% filter(date == max(date)) %>% dplyr::select(zipcode, total_pop_zip)) %>%
    mutate(visits_per_capita = total_visits / total_pop_zip)
    
# get linear model values for cases and visits
  ac_visits_cases_change_model_5 <- lm(later_cases_change ~ visits_per_capita, ac_data_change_5 %>% filter(!is.na(visits_per_capita)))

  ac_data_change_5 %>% filter(!is.na(visits_per_capita)) %>%
  plot_ly() %>%
  add_trace(x = ~visits_per_capita, y = ~later_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~visits_per_capita, y = fitted(lm(later_cases_change ~ visits_per_capita, ac_data_change_5 %>% filter(!is.na(visits_per_capita)))), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model_5)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visit-hours per person'), yaxis = list(title = 'Change in cases per person from baseline 0.0004'), title = "Alameda, 5 weeks after baseline")
  summary(ac_visits_cases_change_model_5)
## 
## Call:
## lm(formula = later_cases_change ~ visits_per_capita, data = ac_data_change_5 %>% 
##     filter(!is.na(visits_per_capita)))
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0010994 -0.0005514 -0.0001878  0.0002261  0.0025410 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       -0.0016203  0.0011815  -1.371   0.1811  
## visits_per_capita  0.0001196  0.0000512   2.337   0.0268 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0008618 on 28 degrees of freedom
## Multiple R-squared:  0.1632, Adjusted R-squared:  0.1333 
## F-statistic:  5.46 on 1 and 28 DF,  p-value: 0.02683
  # log
  ac_visits_cases_change_model_5_log <- lm(later_log_cases_change ~ visits_per_capita, ac_data_change_5 %>% filter(!is.na(visits_per_capita)))

  ac_data_change_5 %>% filter(!is.na(visits_per_capita)) %>%
  plot_ly() %>%
  add_trace(x = ~visits_per_capita, y = ~later_log_cases_change, type = 'scatter', mode = 'markers', text = ~zipcode) %>%
  add_trace(x = ~visits_per_capita, y = fitted(lm(later_log_cases_change ~ visits_per_capita, ac_data_change_5 %>% filter(!is.na(visits_per_capita)))), mode = 'lines', showlegend = F, text = paste("R squared:", summary.lm(ac_visits_cases_change_model_5_log)$r.squared)) %>%
  layout(xaxis = list(title = 'Total visit-hours per person'), yaxis = list(title = 'Change in log(cases/person) from baseline 0.0004'), title = "Alameda, 5 weeks after baseline")
  summary(ac_visits_cases_change_model_5_log)
## 
## Call:
## lm(formula = later_log_cases_change ~ visits_per_capita, data = ac_data_change_5 %>% 
##     filter(!is.na(visits_per_capita)))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.85287 -0.31588 -0.06965  0.30249  1.12272 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)       -0.91402    0.67267  -1.359  0.18506   
## visits_per_capita  0.08820    0.02915   3.026  0.00527 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4906 on 28 degrees of freedom
## Multiple R-squared:  0.2464, Adjusted R-squared:  0.2195 
## F-statistic: 9.155 on 1 and 28 DF,  p-value: 0.00527

Correlations analysis.

ac_data_change_5 <- ac_data_change_5 %>% left_join(ac_curr_cases_dem %>% dplyr::select(-c(date, cases, cases_by_pop, total_pop_zip, geometry)))

runCorrelationsAl(ac_data_change_5)
## 
## Call:
## lm(formula = later_cases_change ~ avg_household_size, data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0011939 -0.0006046 -0.0001743  0.0003696  0.0024225 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        -0.0010847  0.0009633  -1.126   0.2691  
## avg_household_size  0.0007360  0.0003260   2.258   0.0314 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0008647 on 30 degrees of freedom
## Multiple R-squared:  0.1452, Adjusted R-squared:  0.1167 
## F-statistic: 5.097 on 1 and 30 DF,  p-value: 0.03141
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ avg_household_size, data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.98839 -0.31056 -0.09891  0.41259  1.12215 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)         -0.3039     0.5822  -0.522   0.6054  
## avg_household_size   0.4688     0.1970   2.379   0.0239 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5226 on 30 degrees of freedom
## Multiple R-squared:  0.1587, Adjusted R-squared:  0.1307 
## F-statistic: 5.661 on 1 and 30 DF,  p-value: 0.02391
## 
## 
## Call:
## lm(formula = later_cases_change ~ pop_density, data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0008933 -0.0007415 -0.0001685  0.0003240  0.0025253 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 0.0008821  0.0002707   3.259  0.00278 **
## pop_density 0.0713886  0.0853960   0.836  0.40978   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0009245 on 30 degrees of freedom
## Multiple R-squared:  0.02276,    Adjusted R-squared:  -0.00981 
## F-statistic: 0.6988 on 1 and 30 DF,  p-value: 0.4098
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ pop_density, data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.74996 -0.48368  0.00446  0.29408  1.11668 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.9346     0.1642   5.693  3.3e-06 ***
## pop_density  51.0826    51.7917   0.986    0.332    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5607 on 30 degrees of freedom
## Multiple R-squared:  0.03141,    Adjusted R-squared:  -0.0008781 
## F-statistic: 0.9728 on 1 and 30 DF,  p-value: 0.3319
## 
## 
## Call:
## lm(formula = later_cases_change ~ `percent more than 1 occupant`, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0010973 -0.0003509 -0.0001076  0.0002054  0.0023817 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    2.392e-05  2.189e-04   0.109    0.914    
## `percent more than 1 occupant` 1.263e-04  2.259e-05   5.591  4.4e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0006545 on 30 degrees of freedom
## Multiple R-squared:  0.5102, Adjusted R-squared:  0.4939 
## F-statistic: 31.25 on 1 and 30 DF,  p-value: 4.4e-06
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ `percent more than 1 occupant`, 
##     data = ac_data_change)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5820 -0.2072 -0.0329  0.1457  1.2277 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     0.40512    0.12740    3.18  0.00341 ** 
## `percent more than 1 occupant`  0.08007    0.01315    6.09 1.09e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.381 on 30 degrees of freedom
## Multiple R-squared:  0.5528, Adjusted R-squared:  0.5379 
## F-statistic: 37.08 on 1 and 30 DF,  p-value: 1.086e-06
## 
## 
## Call:
## lm(formula = later_cases_change ~ `percent more than 0.5 occupants`, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0011819 -0.0005668 -0.0001210  0.0002660  0.0024531 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                       -7.059e-04  6.350e-04  -1.112   0.2751   
## `percent more than 0.5 occupants`  3.992e-05  1.395e-05   2.862   0.0076 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0008289 on 30 degrees of freedom
## Multiple R-squared:  0.2145, Adjusted R-squared:  0.1883 
## F-statistic: 8.192 on 1 and 30 DF,  p-value: 0.007598
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ `percent more than 0.5 occupants`, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76471 -0.29498 -0.05544  0.29712  1.29441 
## 
## Coefficients:
##                                    Estimate Std. Error t value Pr(>|t|)   
## (Intercept)                       -0.139909   0.373492  -0.375  0.71060   
## `percent more than 0.5 occupants`  0.027172   0.008204   3.312  0.00242 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4875 on 30 degrees of freedom
## Multiple R-squared:  0.2677, Adjusted R-squared:  0.2433 
## F-statistic: 10.97 on 1 and 30 DF,  p-value: 0.002423
## 
## 
## Call:
## lm(formula = later_cases_change ~ `percent 10 or more units`, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0010631 -0.0006383 -0.0001908  0.0002794  0.0027544 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 1.185e-03  2.560e-04   4.627 6.66e-05 ***
## `percent 10 or more units` -7.231e-06  1.162e-05  -0.622    0.538    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0009292 on 30 degrees of freedom
## Multiple R-squared:  0.01274,    Adjusted R-squared:  -0.02017 
## F-statistic: 0.3872 on 1 and 30 DF,  p-value: 0.5385
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ `percent 10 or more units`, 
##     data = ac_data_change)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8909 -0.4010 -0.0199  0.3763  1.2849 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 1.124364   0.156309   7.193 5.27e-08 ***
## `percent 10 or more units` -0.003591   0.007094  -0.506    0.616    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5673 on 30 degrees of freedom
## Multiple R-squared:  0.00847,    Adjusted R-squared:  -0.02458 
## F-statistic: 0.2563 on 1 and 30 DF,  p-value: 0.6164
## 
## 
## Call:
## lm(formula = later_cases_change ~ `percent more than 1 unit`, 
##     data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0009389 -0.0006809 -0.0001951  0.0002303  0.0027043 
## 
## Coefficients:
##                             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                8.958e-04  3.559e-04   2.517   0.0174 *
## `percent more than 1 unit` 4.272e-06  8.087e-06   0.528   0.6012  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0009309 on 30 degrees of freedom
## Multiple R-squared:  0.009214,   Adjusted R-squared:  -0.02381 
## F-statistic: 0.279 on 1 and 30 DF,  p-value: 0.6012
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ `percent more than 1 unit`, 
##     data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78978 -0.44547  0.00088  0.28860  1.24165 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                0.936981   0.216271   4.332 0.000152 ***
## `percent more than 1 unit` 0.003246   0.004914   0.661 0.513895    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5656 on 30 degrees of freedom
## Multiple R-squared:  0.01434,    Adjusted R-squared:  -0.01852 
## F-statistic: 0.4364 on 1 and 30 DF,  p-value: 0.5139
## 
## 
## Call:
## lm(formula = later_cases_change ~ percent_under_75000, data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0011503 -0.0005204 -0.0001303  0.0002934  0.0019030 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -4.723e-04  4.020e-04  -1.175 0.249313    
## percent_under_75000  3.474e-05  8.587e-06   4.045 0.000337 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0007523 on 30 degrees of freedom
## Multiple R-squared:  0.353,  Adjusted R-squared:  0.3314 
## F-statistic: 16.36 on 1 and 30 DF,  p-value: 0.0003371
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ percent_under_75000, data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.83740 -0.31902 -0.05253  0.18362  0.92196 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.052022   0.233235   0.223    0.825    
## percent_under_75000 0.022896   0.004982   4.596 7.27e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4364 on 30 degrees of freedom
## Multiple R-squared:  0.4132, Adjusted R-squared:  0.3936 
## F-statistic: 21.13 on 1 and 30 DF,  p-value: 7.268e-05
## 
## 
## Call:
## lm(formula = later_cases_change ~ percent_under_100000, data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0010894 -0.0005129 -0.0001348  0.0002288  0.0019060 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -9.160e-04  4.751e-04  -1.928 0.063350 .  
## percent_under_100000  3.538e-05  8.173e-06   4.329 0.000153 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0007337 on 30 degrees of freedom
## Multiple R-squared:  0.3845, Adjusted R-squared:  0.364 
## F-statistic: 18.74 on 1 and 30 DF,  p-value: 0.0001534
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ percent_under_100000, data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.80272 -0.28174 -0.04411  0.17096  0.92320 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -0.256988   0.270652  -0.950     0.35    
## percent_under_100000  0.023618   0.004656   5.072  1.9e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.418 on 30 degrees of freedom
## Multiple R-squared:  0.4617, Adjusted R-squared:  0.4437 
## F-statistic: 25.73 on 1 and 30 DF,  p-value: 1.9e-05
## 
## 
## Call:
## lm(formula = later_cases_change ~ percent_under_125000, data = ac_data_change)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0009168 -0.0004999 -0.0001158  0.0001881  0.0020116 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -1.347e-03  5.854e-04  -2.301 0.028526 *  
## percent_under_125000  3.661e-05  8.669e-06   4.223 0.000206 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0007406 on 30 degrees of freedom
## Multiple R-squared:  0.3728, Adjusted R-squared:  0.3519 
## F-statistic: 17.83 on 1 and 30 DF,  p-value: 0.0002062
## 
## 
## Call:
## lm(formula = later_log_cases_change ~ percent_under_125000, data = ac_data_change)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.69702 -0.25095 -0.05153  0.15852  0.98442 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -0.592856   0.326298  -1.817   0.0792 .  
## percent_under_125000  0.025171   0.004833   5.209 1.29e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4129 on 30 degrees of freedom
## Multiple R-squared:  0.4749, Adjusted R-squared:  0.4574 
## F-statistic: 27.13 on 1 and 30 DF,  p-value: 1.292e-05

Try multiple regression analysis with these, filtering to valid visits data ranges.

ac_cases_dem_model_visits_5 <- lm(later_cases_change ~ percent_under_125000 + avg_household_size + pop_density + `percent more than 1 occupant` + `percent more than 1 unit` + visits_per_capita, ac_data_change_5 %>% filter(!is.na(visits_per_capita)))

summary(ac_cases_dem_model_visits_5)
## 
## Call:
## lm(formula = later_cases_change ~ percent_under_125000 + avg_household_size + 
##     pop_density + `percent more than 1 occupant` + `percent more than 1 unit` + 
##     visits_per_capita, data = ac_data_change_5 %>% filter(!is.na(visits_per_capita)))
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0009177 -0.0003691 -0.0001696  0.0001847  0.0017177 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                     3.168e-03  2.960e-03   1.070   0.2956  
## percent_under_125000            2.169e-05  1.454e-05   1.492   0.1492  
## avg_household_size             -1.288e-03  9.857e-04  -1.307   0.2042  
## pop_density                    -8.309e-02  1.008e-01  -0.824   0.4182  
## `percent more than 1 occupant`  1.784e-04  7.317e-05   2.438   0.0229 *
## `percent more than 1 unit`     -2.438e-05  1.692e-05  -1.441   0.1630  
## visits_per_capita              -3.594e-06  6.854e-05  -0.052   0.9586  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000672 on 23 degrees of freedom
## Multiple R-squared:  0.582,  Adjusted R-squared:  0.473 
## F-statistic: 5.338 on 6 and 23 DF,  p-value: 0.001409
ac_cases_dem_model_visits_log_5 <- lm(later_log_cases_change ~ percent_under_125000 + avg_household_size + pop_density + `percent more than 1 occupant` + `percent more than 1 unit` + visits_per_capita, ac_data_change_5 %>% filter(!is.na(visits_per_capita)))

summary(ac_cases_dem_model_visits_log_5)
## 
## Call:
## lm(formula = later_log_cases_change ~ percent_under_125000 + 
##     avg_household_size + pop_density + `percent more than 1 occupant` + 
##     `percent more than 1 unit` + visits_per_capita, data = ac_data_change_5 %>% 
##     filter(!is.na(visits_per_capita)))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.59625 -0.20752 -0.04997  0.09367  0.86935 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                      1.166444   1.606104   0.726   0.4750  
## percent_under_125000             0.016656   0.007887   2.112   0.0458 *
## avg_household_size              -0.643489   0.534877  -1.203   0.2412  
## pop_density                    -39.979506  54.701297  -0.731   0.4722  
## `percent more than 1 occupant`   0.083776   0.039707   2.110   0.0460 *
## `percent more than 1 unit`      -0.011903   0.009181  -1.296   0.2077  
## visits_per_capita                0.024724   0.037191   0.665   0.5128  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3646 on 23 degrees of freedom
## Multiple R-squared:  0.6581, Adjusted R-squared:  0.5689 
## F-statistic: 7.378 on 6 and 23 DF,  p-value: 0.0001733

Just include the variables that were relevant before.

ac_cases_dem_model_visits_5_2 <- lm(later_cases_change ~ percent_under_125000 + avg_household_size + `percent more than 1 occupant` + visits_per_capita, ac_data_change_5 %>% filter(!is.na(visits_per_capita)))

summary(ac_cases_dem_model_visits_5_2)
## 
## Call:
## lm(formula = later_cases_change ~ percent_under_125000 + avg_household_size + 
##     `percent more than 1 occupant` + visits_per_capita, data = ac_data_change_5 %>% 
##     filter(!is.na(visits_per_capita)))
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.0008467 -0.0004358 -0.0001939  0.0002918  0.0022880 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                    -1.066e-03  1.552e-03  -0.687   0.4984  
## percent_under_125000            1.580e-05  1.362e-05   1.160   0.2569  
## avg_household_size              5.777e-07  5.367e-04   0.001   0.9991  
## `percent more than 1 occupant`  8.892e-05  4.998e-05   1.779   0.0874 .
## visits_per_capita               1.594e-05  6.370e-05   0.250   0.8045  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0006881 on 25 degrees of freedom
## Multiple R-squared:  0.5237, Adjusted R-squared:  0.4475 
## F-statistic: 6.871 on 4 and 25 DF,  p-value: 0.0007104
ac_cases_dem_model_visits_log_5_2 <- lm(later_log_cases_change ~percent_under_125000 + avg_household_size + `percent more than 1 occupant` + visits_per_capita, ac_data_change_5 %>% filter(!is.na(visits_per_capita)))

summary(ac_cases_dem_model_visits_log_5_2)
## 
## Call:
## lm(formula = later_log_cases_change ~ percent_under_125000 + 
##     avg_household_size + `percent more than 1 occupant` + visits_per_capita, 
##     data = ac_data_change_5 %>% filter(!is.na(visits_per_capita)))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.65564 -0.14940 -0.06488  0.06747  1.14641 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                    -0.896615   0.831895  -1.078   0.2914  
## percent_under_125000            0.013816   0.007301   1.892   0.0701 .
## avg_household_size             -0.014984   0.287671  -0.052   0.9589  
## `percent more than 1 occupant`  0.040170   0.026790   1.499   0.1463  
## visits_per_capita               0.034096   0.034144   0.999   0.3276  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3688 on 25 degrees of freedom
## Multiple R-squared:  0.6198, Adjusted R-squared:  0.5589 
## F-statistic: 10.19 on 4 and 25 DF,  p-value: 4.927e-05